From 6bd25a6f90cdef6b70b2267a49b77a3d47b7a532 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Sat, 23 Nov 2024 09:07:43 -0500
Subject: [PATCH 001/162] js pbar improvements
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 3 ++-
javascript/progressBar.js | 40 +++++++++++++++++++++++++++++++--------
modules/sd_models.py | 8 ++++----
3 files changed, 38 insertions(+), 13 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index bc6cd163b..0dabf0d7c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
# Change Log for SD.Next
-## Update for 2024-11-22
+## Update for 2024-11-23
- Model loader improvements:
- detect model components on model load fail
@@ -16,6 +16,7 @@
- fix README links
- fix sdxl controlnet single-file loader
- relax settings validator
+ - improve js progress calls resiliency
## Update for 2024-11-21
diff --git a/javascript/progressBar.js b/javascript/progressBar.js
index a9ecb31e9..3f954e13b 100644
--- a/javascript/progressBar.js
+++ b/javascript/progressBar.js
@@ -4,23 +4,37 @@ function request(url, data, handler, errorHandler) {
const xhr = new XMLHttpRequest();
xhr.open('POST', url, true);
xhr.setRequestHeader('Content-Type', 'application/json');
+ xhr.timeout = 5000;
+ xhr.ontimeout = () => {
+ console.error('xhr.ontimeout', xhr);
+ errorHandler();
+ };
+ xhr.onerror = () => {
+ console.error('xhr.onerror', xhr);
+ errorHandler();
+ };
+ xhr.onabort = () => {
+ console.error('xhr.onabort', xhr);
+ errorHandler();
+ };
xhr.onreadystatechange = () => {
if (xhr.readyState === 4) {
if (xhr.status === 200) {
try {
- const js = JSON.parse(xhr.responseText);
- handler(js);
- } catch (error) {
- console.error(error);
+ const json = JSON.parse(xhr.responseText);
+ handler(json);
+ } catch (err) {
+ console.error('xhr.onreadystatechange', xhr, err);
errorHandler();
}
} else {
+ console.error('xhr.onreadystatechange', xhr);
errorHandler();
}
}
};
- const js = JSON.stringify(data);
- xhr.send(js);
+ const req = JSON.stringify(data);
+ xhr.send(req);
}
function pad2(x) {
@@ -118,11 +132,14 @@ function requestProgress(id_task, progressEl, galleryEl, atEnd = null, onProgres
const start = (id_task, id_live_preview) => { // eslint-disable-line no-shadow
if (!opts.live_previews_enable || opts.live_preview_refresh_period === 0 || opts.show_progress_every_n_steps === 0) return;
- request('./internal/progress', { id_task, id_live_preview }, (res) => {
+
+ const onProgressHandler = (res) => {
+ // debug('onProgress', res);
lastState = res;
const elapsedFromStart = (new Date() - dateStart) / 1000;
hasStarted |= res.active;
if (res.completed || (!res.active && (hasStarted || once)) || (elapsedFromStart > 30 && !res.queued && res.progress === prevProgress)) {
+ debug('onProgressEnd', res);
done();
return;
}
@@ -131,7 +148,14 @@ function requestProgress(id_task, progressEl, galleryEl, atEnd = null, onProgres
if (res.live_preview && galleryEl) img.src = res.live_preview;
if (onProgress) onProgress(res);
setTimeout(() => start(id_task, id_live_preview), opts.live_preview_refresh_period || 500);
- }, done);
+ };
+
+ const onProgressErrorHandler = (err) => {
+ console.error('onProgressError', err);
+ done();
+ };
+
+ request('./internal/progress', { id_task, id_live_preview }, onProgressHandler, onProgressErrorHandler);
};
start(id_task, 0);
}
diff --git a/modules/sd_models.py b/modules/sd_models.py
index cf1921a36..a6ff19b6f 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -319,12 +319,12 @@ def set_diffuser_offload(sd_model, op: str = 'model'):
if not (hasattr(sd_model, "has_accelerate") and sd_model.has_accelerate):
sd_model.has_accelerate = False
if hasattr(sd_model, 'maybe_free_model_hooks') and shared.opts.diffusers_offload_mode == "none":
- shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode}')
+ shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} limit={shared.opts.cuda_mem_fraction}')
sd_model.maybe_free_model_hooks()
sd_model.has_accelerate = False
if hasattr(sd_model, "enable_model_cpu_offload") and shared.opts.diffusers_offload_mode == "model":
try:
- shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode}')
+ shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} limit={shared.opts.cuda_mem_fraction}')
if shared.opts.diffusers_move_base or shared.opts.diffusers_move_unet or shared.opts.diffusers_move_refiner:
shared.opts.diffusers_move_base = False
shared.opts.diffusers_move_unet = False
@@ -339,7 +339,7 @@ def set_diffuser_offload(sd_model, op: str = 'model'):
shared.log.error(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} {e}')
if hasattr(sd_model, "enable_sequential_cpu_offload") and shared.opts.diffusers_offload_mode == "sequential":
try:
- shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode}')
+ shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} limit={shared.opts.cuda_mem_fraction}')
if shared.opts.diffusers_move_base or shared.opts.diffusers_move_unet or shared.opts.diffusers_move_refiner:
shared.opts.diffusers_move_base = False
shared.opts.diffusers_move_unet = False
@@ -359,7 +359,7 @@ def set_diffuser_offload(sd_model, op: str = 'model'):
shared.log.error(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} {e}')
if shared.opts.diffusers_offload_mode == "balanced":
try:
- shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode}')
+ shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} threshold={shared.opts.diffusers_offload_max_gpu_memory} limit={shared.opts.cuda_mem_fraction}')
sd_model = apply_balanced_offload(sd_model)
except Exception as e:
shared.log.error(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} {e}')
From 8a1eaedc82b53902aa780848262297c91edea02e Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Sat, 23 Nov 2024 10:47:06 -0500
Subject: [PATCH 002/162] browser to server logging
Signed-off-by: Vladimir Mandic
---
.eslintrc.json | 7 ++--
CHANGELOG.md | 2 ++
installer.py | 2 +-
javascript/extraNetworks.js | 15 +--------
javascript/gallery.js | 4 +--
javascript/loader.js | 8 +++--
javascript/logMonitor.js | 15 ++++++---
javascript/logger.js | 67 +++++++++++++++++++++++++++++++++++++
javascript/progressBar.js | 41 ++---------------------
javascript/script.js | 16 +--------
javascript/ui.js | 6 ++--
modules/api/api.py | 3 +-
modules/api/models.py | 8 ++++-
modules/api/server.py | 12 ++++++-
14 files changed, 120 insertions(+), 86 deletions(-)
create mode 100644 javascript/logger.js
diff --git a/.eslintrc.json b/.eslintrc.json
index 62feb13a5..2dddb41a1 100644
--- a/.eslintrc.json
+++ b/.eslintrc.json
@@ -42,9 +42,13 @@
"globals": {
// asssets
"panzoom": "readonly",
- // script.js
+ // logger.js
"log": "readonly",
"debug": "readonly",
+ "error": "readonly",
+ "xhrGet": "readonly",
+ "xhrPost": "readonly",
+ // script.js
"gradioApp": "readonly",
"executeCallbacks": "readonly",
"onAfterUiUpdate": "readonly",
@@ -87,7 +91,6 @@
// settings.js
"registerDragDrop": "readonly",
// extraNetworks.js
- "requestGet": "readonly",
"getENActiveTab": "readonly",
"quickApplyStyle": "readonly",
"quickSaveStyle": "readonly",
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0dabf0d7c..4a4a0a416 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,8 @@
- Flux: do not recast quants
- Sampler improvements
- update DPM FlowMatch samplers
+- UI:
+ - browser->server logging
- Fixes:
- update `diffusers`
- fix README links
diff --git a/installer.py b/installer.py
index 0b64c3616..8f552526a 100644
--- a/installer.py
+++ b/installer.py
@@ -459,7 +459,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
def check_diffusers():
if args.skip_all or args.skip_requirements:
return
- sha = 'b5fd6f13f5434d69d919cc8cedf0b11db664cf06'
+ sha = '7ac6e286ee994270e737b70c904ea50049d53567'
pkg = pkg_resources.working_set.by_key.get('diffusers', None)
minor = int(pkg.version.split('.')[1] if pkg is not None else 0)
cur = opts.get('diffusers_version', '') if minor > 0 else ''
diff --git a/javascript/extraNetworks.js b/javascript/extraNetworks.js
index 77fe125f3..622e40faf 100644
--- a/javascript/extraNetworks.js
+++ b/javascript/extraNetworks.js
@@ -3,19 +3,6 @@ let sortVal = -1;
// helpers
-const requestGet = (url, data, handler) => {
- const xhr = new XMLHttpRequest();
- const args = Object.keys(data).map((k) => `${encodeURIComponent(k)}=${encodeURIComponent(data[k])}`).join('&');
- xhr.open('GET', `${url}?${args}`, true);
- xhr.onreadystatechange = () => {
- if (xhr.readyState === 4) {
- if (xhr.status === 200) handler(JSON.parse(xhr.responseText));
- else console.error(`Request: url=${url} status=${xhr.status} err`);
- }
- };
- xhr.send(JSON.stringify(data));
-};
-
const getENActiveTab = () => {
let tabName = '';
if (gradioApp().getElementById('tab_txt2img').style.display === 'block') tabName = 'txt2img';
@@ -98,7 +85,7 @@ function readCardTags(el, tags) {
}
function readCardDescription(page, item) {
- requestGet('/sd_extra_networks/description', { page, item }, (data) => {
+ xhrGet('/sd_extra_networks/description', { page, item }, (data) => {
const tabname = getENActiveTab();
const description = gradioApp().querySelector(`#${tabname}_description > label > textarea`);
description.value = data?.description?.trim() || '';
diff --git a/javascript/gallery.js b/javascript/gallery.js
index 1f3afd148..05e594e4c 100644
--- a/javascript/gallery.js
+++ b/javascript/gallery.js
@@ -94,14 +94,14 @@ async function delayFetchThumb(fn) {
outstanding++;
const res = await fetch(`/sdapi/v1/browser/thumb?file=${encodeURI(fn)}`, { priority: 'low' });
if (!res.ok) {
- console.error(res.statusText);
+ error(`fetchThumb: ${res.statusText}`);
outstanding--;
return undefined;
}
const json = await res.json();
outstanding--;
if (!res || !json || json.error || Object.keys(json).length === 0) {
- if (json.error) console.error(json.error);
+ if (json.error) error(`fetchThumb: ${json.error}`);
return undefined;
}
return json;
diff --git a/javascript/loader.js b/javascript/loader.js
index f3c7fe60f..8cd4811bf 100644
--- a/javascript/loader.js
+++ b/javascript/loader.js
@@ -20,7 +20,7 @@ async function preloadImages() {
try {
await Promise.all(imagePromises);
} catch (error) {
- console.error('Error preloading images:', error);
+ error(`preloadImages: ${error}`);
}
}
@@ -43,14 +43,16 @@ async function createSplash() {
const motdEl = document.getElementById('motd');
if (motdEl) motdEl.innerHTML = text.replace(/["]+/g, '');
})
- .catch((err) => console.error('getMOTD:', err));
+ .catch((err) => error(`getMOTD: ${err}`));
}
async function removeSplash() {
const splash = document.getElementById('splash');
if (splash) splash.remove();
log('removeSplash');
- log('startupTime', Math.round(performance.now() - appStartTime) / 1000);
+ const t = Math.round(performance.now() - appStartTime) / 1000;
+ log('startupTime', t);
+ xhrPost('/sdapi/v1/log', { message: `ready time=${t}` });
}
window.onload = createSplash;
diff --git a/javascript/logMonitor.js b/javascript/logMonitor.js
index e4fe99a7f..9b915e6da 100644
--- a/javascript/logMonitor.js
+++ b/javascript/logMonitor.js
@@ -2,6 +2,7 @@ let logMonitorEl = null;
let logMonitorStatus = true;
let logWarnings = 0;
let logErrors = 0;
+let logConnected = false;
function dateToStr(ts) {
const dt = new Date(1000 * ts);
@@ -29,8 +30,7 @@ async function logMonitor() {
row.innerHTML = `${dateToStr(l.created)} | ${level}${l.facility} | ${module}${l.msg} | `;
logMonitorEl.appendChild(row);
} catch (e) {
- // console.log('logMonitor', e);
- console.error('logMonitor line', line);
+ error(`logMonitor: ${line}`);
}
};
@@ -46,6 +46,7 @@ async function logMonitor() {
if (logMonitorStatus) setTimeout(logMonitor, opts.logmonitor_refresh_period);
else setTimeout(logMonitor, 10 * 1000); // on failure try to reconnect every 10sec
+
if (!opts.logmonitor_show) return;
logMonitorStatus = false;
if (!logMonitorEl) {
@@ -64,14 +65,20 @@ async function logMonitor() {
const lines = await res.json();
if (logMonitorEl && lines?.length > 0) logMonitorEl.parentElement.parentElement.style.display = opts.logmonitor_show ? 'block' : 'none';
for (const line of lines) addLogLine(line);
+ if (!logConnected) {
+ logConnected = true;
+ xhrPost('/sdapi/v1/log', { debug: 'connected' });
+ }
} else {
- addLogLine(`{ "created": ${Date.now()}, "level":"ERROR", "module":"logMonitor", "facility":"ui", "msg":"Failed to fetch log: ${res?.status} ${res?.statusText}" }`);
+ logConnected = false;
logErrors++;
+ addLogLine(`{ "created": ${Date.now()}, "level":"ERROR", "module":"logMonitor", "facility":"ui", "msg":"Failed to fetch log: ${res?.status} ${res?.statusText}" }`);
}
cleanupLog(atBottom);
} catch (err) {
- addLogLine(`{ "created": ${Date.now()}, "level":"ERROR", "module":"logMonitor", "facility":"ui", "msg":"Failed to fetch log: server unreachable" }`);
+ logConnected = false;
logErrors++;
+ addLogLine(`{ "created": ${Date.now()}, "level":"ERROR", "module":"logMonitor", "facility":"ui", "msg":"Failed to fetch log: server unreachable" }`);
cleanupLog(atBottom);
}
}
diff --git a/javascript/logger.js b/javascript/logger.js
new file mode 100644
index 000000000..4ff1fb822
--- /dev/null
+++ b/javascript/logger.js
@@ -0,0 +1,67 @@
+const serverTimeout = 5000;
+
+const log = async (...msg) => {
+ const dt = new Date();
+ const ts = `${dt.getHours().toString().padStart(2, '0')}:${dt.getMinutes().toString().padStart(2, '0')}:${dt.getSeconds().toString().padStart(2, '0')}.${dt.getMilliseconds().toString().padStart(3, '0')}`;
+ if (window.logger) window.logger.innerHTML += window.logPrettyPrint(...msg);
+ console.log(ts, ...msg); // eslint-disable-line no-console
+};
+
+const debug = async (...msg) => {
+ const dt = new Date();
+ const ts = `${dt.getHours().toString().padStart(2, '0')}:${dt.getMinutes().toString().padStart(2, '0')}:${dt.getSeconds().toString().padStart(2, '0')}.${dt.getMilliseconds().toString().padStart(3, '0')}`;
+ if (window.logger) window.logger.innerHTML += window.logPrettyPrint(...msg);
+ console.debug(ts, ...msg); // eslint-disable-line no-console
+};
+
+const error = async (...msg) => {
+ const dt = new Date();
+ const ts = `${dt.getHours().toString().padStart(2, '0')}:${dt.getMinutes().toString().padStart(2, '0')}:${dt.getSeconds().toString().padStart(2, '0')}.${dt.getMilliseconds().toString().padStart(3, '0')}`;
+ if (window.logger) window.logger.innerHTML += window.logPrettyPrint(...msg);
+ console.error(ts, ...msg); // eslint-disable-line no-console
+ xhrPost('/sdapi/v1/log', { error: msg.join(' ') }); // eslint-disable-line no-use-before-define
+};
+
+const xhrInternal = (xhrObj, data, handler = undefined, errorHandler = undefined, ignore = false) => {
+ const err = (msg) => {
+ if (!ignore) {
+ error(`${msg}: state=${xhrObj.readyState} status=${xhrObj.status} response=${xhrObj.responseText}`);
+ if (errorHandler) errorHandler();
+ }
+ };
+
+ xhrObj.setRequestHeader('Content-Type', 'application/json');
+ xhrObj.timeout = serverTimeout;
+ xhrObj.ontimeout = () => err('xhr.ontimeout');
+ xhrObj.onerror = () => err('xhr.onerror');
+ xhrObj.onabort = () => err('xhr.onabort');
+ xhrObj.onreadystatechange = () => {
+ if (xhrObj.readyState === 4) {
+ if (xhrObj.status === 200) {
+ try {
+ const json = JSON.parse(xhrObj.responseText);
+ if (handler) handler(json);
+ } catch (e) {
+ error(`xhr.onreadystatechange: ${e}`);
+ }
+ } else {
+ err(`xhr.onreadystatechange: state=${xhrObj.readyState} status=${xhrObj.status} response=${xhrObj.responseText}`);
+ }
+ }
+ };
+ const req = JSON.stringify(data);
+ xhrObj.send(req);
+};
+
+const xhrGet = (url, data, handler = undefined, errorHandler = undefined, ignore = false) => {
+ const xhr = new XMLHttpRequest();
+ const args = Object.keys(data).map((k) => `${encodeURIComponent(k)}=${encodeURIComponent(data[k])}`).join('&');
+ xhr.open('GET', `${url}?${args}`, true);
+ xhrInternal(xhr, data, handler, errorHandler, ignore);
+};
+
+function xhrPost(url, data, handler = undefined, errorHandler = undefined, ignore = false) {
+ const xhr = new XMLHttpRequest();
+ xhr.open('POST', url, true);
+ xhrInternal(xhr, data, handler, errorHandler, ignore);
+}
diff --git a/javascript/progressBar.js b/javascript/progressBar.js
index 3f954e13b..9d897bc87 100644
--- a/javascript/progressBar.js
+++ b/javascript/progressBar.js
@@ -1,42 +1,5 @@
let lastState = {};
-function request(url, data, handler, errorHandler) {
- const xhr = new XMLHttpRequest();
- xhr.open('POST', url, true);
- xhr.setRequestHeader('Content-Type', 'application/json');
- xhr.timeout = 5000;
- xhr.ontimeout = () => {
- console.error('xhr.ontimeout', xhr);
- errorHandler();
- };
- xhr.onerror = () => {
- console.error('xhr.onerror', xhr);
- errorHandler();
- };
- xhr.onabort = () => {
- console.error('xhr.onabort', xhr);
- errorHandler();
- };
- xhr.onreadystatechange = () => {
- if (xhr.readyState === 4) {
- if (xhr.status === 200) {
- try {
- const json = JSON.parse(xhr.responseText);
- handler(json);
- } catch (err) {
- console.error('xhr.onreadystatechange', xhr, err);
- errorHandler();
- }
- } else {
- console.error('xhr.onreadystatechange', xhr);
- errorHandler();
- }
- }
- };
- const req = JSON.stringify(data);
- xhr.send(req);
-}
-
function pad2(x) {
return x < 10 ? `0${x}` : x;
}
@@ -151,11 +114,11 @@ function requestProgress(id_task, progressEl, galleryEl, atEnd = null, onProgres
};
const onProgressErrorHandler = (err) => {
- console.error('onProgressError', err);
+ error(`onProgressError: ${err}`);
done();
};
- request('./internal/progress', { id_task, id_live_preview }, onProgressHandler, onProgressErrorHandler);
+ xhrPost('./internal/progress', { id_task, id_live_preview }, onProgressHandler, onProgressErrorHandler);
};
start(id_task, 0);
}
diff --git a/javascript/script.js b/javascript/script.js
index 104567dd7..250e90ba2 100644
--- a/javascript/script.js
+++ b/javascript/script.js
@@ -1,17 +1,3 @@
-const log = (...msg) => {
- const dt = new Date();
- const ts = `${dt.getHours().toString().padStart(2, '0')}:${dt.getMinutes().toString().padStart(2, '0')}:${dt.getSeconds().toString().padStart(2, '0')}.${dt.getMilliseconds().toString().padStart(3, '0')}`;
- if (window.logger) window.logger.innerHTML += window.logPrettyPrint(...msg);
- console.log(ts, ...msg); // eslint-disable-line no-console
-};
-
-const debug = (...msg) => {
- const dt = new Date();
- const ts = `${dt.getHours().toString().padStart(2, '0')}:${dt.getMinutes().toString().padStart(2, '0')}:${dt.getSeconds().toString().padStart(2, '0')}.${dt.getMilliseconds().toString().padStart(3, '0')}`;
- if (window.logger) window.logger.innerHTML += window.logPrettyPrint(...msg);
- console.debug(ts, ...msg); // eslint-disable-line no-console
-};
-
async function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms)); // eslint-disable-line no-promise-executor-return
}
@@ -82,7 +68,7 @@ function executeCallbacks(queue, arg) {
try {
callback(arg);
} catch (e) {
- console.error('error running callback', callback, ':', e);
+ error(`executeCallbacks: ${callback} ${e}`);
}
}
}
diff --git a/javascript/ui.js b/javascript/ui.js
index 8808f1c8b..81d1c67e4 100644
--- a/javascript/ui.js
+++ b/javascript/ui.js
@@ -28,7 +28,7 @@ function clip_gallery_urls(gallery) {
const files = gallery.map((v) => v.data);
navigator.clipboard.writeText(JSON.stringify(files)).then(
() => log('clipboard:', files),
- (err) => console.error('clipboard:', files, err),
+ (err) => error(`clipboard: ${files} ${err}`),
);
}
@@ -493,9 +493,9 @@ function previewTheme() {
el.src = `/file=html/${name}.jpg`;
}
})
- .catch((e) => console.error('previewTheme:', e));
+ .catch((e) => error(`previewTheme: ${e}`));
})
- .catch((e) => console.error('previewTheme:', e));
+ .catch((e) => error(`previewTheme: ${e}`));
}
async function browseFolder() {
diff --git a/modules/api/api.py b/modules/api/api.py
index f8346995d..d48cbf521 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -35,7 +35,8 @@ def __init__(self, app: FastAPI, queue_lock: Lock):
# server api
self.add_api_route("/sdapi/v1/motd", server.get_motd, methods=["GET"], response_model=str)
- self.add_api_route("/sdapi/v1/log", server.get_log_buffer, methods=["GET"], response_model=List[str])
+ self.add_api_route("/sdapi/v1/log", server.get_log, methods=["GET"], response_model=List[str])
+ self.add_api_route("/sdapi/v1/log", server.post_log, methods=["POST"])
self.add_api_route("/sdapi/v1/start", self.get_session_start, methods=["GET"])
self.add_api_route("/sdapi/v1/version", server.get_version, methods=["GET"])
self.add_api_route("/sdapi/v1/status", server.get_status, methods=["GET"], response_model=models.ResStatus)
diff --git a/modules/api/models.py b/modules/api/models.py
index e68ebf081..39bcbe383 100644
--- a/modules/api/models.py
+++ b/modules/api/models.py
@@ -286,10 +286,16 @@ class ResImageInfo(BaseModel):
items: dict = Field(title="Items", description="A dictionary containing all the other fields the image had")
parameters: dict = Field(title="Parameters", description="A dictionary with parsed generation info fields")
-class ReqLog(BaseModel):
+class ReqGetLog(BaseModel):
lines: int = Field(default=100, title="Lines", description="How many lines to return")
clear: bool = Field(default=False, title="Clear", description="Should the log be cleared after returning the lines?")
+
+class ReqPostLog(BaseModel):
+ message: Optional[str] = Field(title="Message", description="The info message to log")
+ debug: Optional[str] = Field(title="Debug message", description="The debug message to log")
+ error: Optional[str] = Field(title="Error message", description="The error message to log")
+
class ReqProgress(BaseModel):
skip_current_image: bool = Field(default=False, title="Skip current image", description="Skip current image serialization")
diff --git a/modules/api/server.py b/modules/api/server.py
index 939e19c86..dabbe634c 100644
--- a/modules/api/server.py
+++ b/modules/api/server.py
@@ -37,12 +37,22 @@ def get_platform():
from modules.loader import get_packages as loader_get_packages
return { **installer_get_platform(), **loader_get_packages() }
-def get_log_buffer(req: models.ReqLog = Depends()):
+def get_log(req: models.ReqGetLog = Depends()):
lines = shared.log.buffer[:req.lines] if req.lines > 0 else shared.log.buffer.copy()
if req.clear:
shared.log.buffer.clear()
return lines
+def post_log(req: models.ReqPostLog):
+ if req.message is not None:
+ shared.log.info(f'UI: {req.message}')
+ if req.debug is not None:
+ shared.log.debug(f'UI: {req.debug}')
+ if req.error is not None:
+ shared.log.error(f'UI: {req.error}')
+ return {}
+
+
def get_config():
options = {}
for k in shared.opts.data.keys():
From 67c6b93213c20d830a00f0c7e6b429527bf6c725 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Sat, 23 Nov 2024 15:27:01 -0500
Subject: [PATCH 003/162] flux tools support
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 22 +++++-
TODO.md | 2 +-
installer.py | 4 +-
javascript/logger.js | 3 +-
modules/model_flux.py | 12 +++-
modules/modelloader.py | 3 +
modules/processing_diffusers.py | 6 +-
modules/sd_checkpoint.py | 3 +-
modules/sd_models.py | 10 ++-
scripts/animatediff.py | 2 +-
scripts/cogvideo.py | 2 +-
scripts/flux_tools.py | 115 ++++++++++++++++++++++++++++++++
scripts/image2video.py | 2 +-
wiki | 2 +-
14 files changed, 170 insertions(+), 18 deletions(-)
create mode 100644 scripts/flux_tools.py
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4a4a0a416..ad77bbbe5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,23 @@
## Update for 2024-11-23
+- [Flux Tools](https://blackforestlabs.ai/flux-1-tools/):
+ **Redux** is actually a tool, **Fill** is inpaint/outpaint optimized version of *Flux-dev*
+ **Canny** & **Depth** are optimized versions of *Flux-dev* for their respective tasks: they are *not* ControlNets that work on top of a model
+ To use, go to image or control interface and select *Flux Tools* in scripts
+ All models are auto-downloaded on first use
+ *note*: All models are [gated](https://github.com/vladmandic/automatic/wiki/Gated) and require acceptance of terms and conditions via web page
+ *recommended*: Enable on-the-fly [quantization](https://github.com/vladmandic/automatic/wiki/Quantization) or [compression](https://github.com/vladmandic/automatic/wiki/NNCF-Compression) to reduce resource usage
+ *todo*: support for Canny/Depth LoRAs
+ - [Redux](https://huggingface.co/black-forest-labs/FLUX.1-Redux-dev): ~0.1GB
+ works together with existing model and basically uses input image to analyze it and use that instead of prompt
+ *recommended*: low denoise strength levels result in more variety
+ - [Fill](https://huggingface.co/black-forest-labs/FLUX.1-Fill-dev): ~23.8GB, replaces currently loaded model
+ *note*: can be used in inpaint/outpaint mode only
+ - [Canny](https://huggingface.co/black-forest-labs/FLUX.1-Canny-dev): ~23.8GB, replaces currently loaded model
+ *recommended*: guidance scale 30
+ - [Depth](https://huggingface.co/black-forest-labs/FLUX.1-Depth-dev): ~23.8GB, replaces currently loaded model
+ *recommended*: guidance scale 10
- Model loader improvements:
- detect model components on model load fail
- Flux, SD35: force unload model
@@ -11,14 +28,15 @@
- Flux: do not recast quants
- Sampler improvements
- update DPM FlowMatch samplers
-- UI:
+- UI:
- browser->server logging
- Fixes:
- update `diffusers`
- fix README links
- fix sdxl controlnet single-file loader
- relax settings validator
- - improve js progress calls resiliency
+ - improve js progress calls resiliency
+ - fix text-to-video pipeline
## Update for 2024-11-21
diff --git a/TODO.md b/TODO.md
index 973e062dc..73008039d 100644
--- a/TODO.md
+++ b/TODO.md
@@ -7,9 +7,9 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma
- SD35 IPAdapter:
- SD35 LoRA:
- Flux IPAdapter:
-- Flux Fill/ControlNet/Redux:
- Flux NF4:
- SANA:
+- LTX-Video:
## Other
diff --git a/installer.py b/installer.py
index 8f552526a..396b53fab 100644
--- a/installer.py
+++ b/installer.py
@@ -212,7 +212,7 @@ def installed(package, friendly: str = None, reload = False, quiet = False):
if friendly:
pkgs = friendly.split()
else:
- pkgs = [p for p in package.split() if not p.startswith('-') and not p.startswith('=')]
+ pkgs = [p for p in package.split() if not p.startswith('-') and not p.startswith('=') and not p.startswith('git+')]
pkgs = [p.split('/')[-1] for p in pkgs] # get only package name if installing from url
for pkg in pkgs:
if '!=' in pkg:
@@ -295,7 +295,7 @@ def install(package, friendly: str = None, ignore: bool = False, reinstall: bool
quick_allowed = False
if args.reinstall or reinstall or not installed(package, friendly, quiet=quiet):
deps = '' if not no_deps else '--no-deps '
- res = pip(f"install{' --upgrade' if not args.uv else ''} {deps}{package}", ignore=ignore, uv=package != "uv")
+ res = pip(f"install{' --upgrade' if not args.uv else ''} {deps}{package}", ignore=ignore, uv=package != "uv" and not package.startswith('git+'))
try:
import importlib # pylint: disable=deprecated-module
importlib.reload(pkg_resources)
diff --git a/javascript/logger.js b/javascript/logger.js
index 4ff1fb822..5aa8face3 100644
--- a/javascript/logger.js
+++ b/javascript/logger.js
@@ -19,7 +19,8 @@ const error = async (...msg) => {
const ts = `${dt.getHours().toString().padStart(2, '0')}:${dt.getMinutes().toString().padStart(2, '0')}:${dt.getSeconds().toString().padStart(2, '0')}.${dt.getMilliseconds().toString().padStart(3, '0')}`;
if (window.logger) window.logger.innerHTML += window.logPrettyPrint(...msg);
console.error(ts, ...msg); // eslint-disable-line no-console
- xhrPost('/sdapi/v1/log', { error: msg.join(' ') }); // eslint-disable-line no-use-before-define
+ const txt = msg.join(' ');
+ if (!txt.includes('asctime') && !txt.includes('xhr.')) xhrPost('/sdapi/v1/log', { error: txt }); // eslint-disable-line no-use-before-define
};
const xhrInternal = (xhrObj, data, handler = undefined, errorHandler = undefined, ignore = false) => {
diff --git a/modules/model_flux.py b/modules/model_flux.py
index 17234d9a4..324e50b36 100644
--- a/modules/model_flux.py
+++ b/modules/model_flux.py
@@ -306,9 +306,17 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
model_te.loaded_te = shared.opts.sd_text_encoder
if vae is not None:
kwargs['vae'] = vae
- shared.log.debug(f'Load model: type=FLUX preloaded={list(kwargs)}')
if repo_id == 'sayakpaul/flux.1-dev-nf4':
repo_id = 'black-forest-labs/FLUX.1-dev' # workaround since sayakpaul model is missing model_index.json
+ if 'Fill' in repo_id:
+ cls = diffusers.FluxFillPipeline
+ elif 'Canny' in repo_id:
+ cls = diffusers.FluxControlPipeline
+ elif 'Depth' in repo_id:
+ cls = diffusers.FluxControlPipeline
+ else:
+ cls = diffusers.FluxPipeline
+ shared.log.debug(f'Load model: type=FLUX cls={cls.__name__} preloaded={list(kwargs)} revision={diffusers_load_config.get("revision", None)}')
for c in kwargs:
if kwargs[c].dtype == torch.float32 and devices.dtype != torch.float32:
shared.log.warning(f'Load model: type=FLUX component={c} dtype={kwargs[c].dtype} cast dtype={devices.dtype} recast')
@@ -319,7 +327,7 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
if checkpoint_info.path.endswith('.safetensors') and os.path.isfile(checkpoint_info.path):
pipe = diffusers.FluxPipeline.from_single_file(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **kwargs, **diffusers_load_config)
else:
- pipe = diffusers.FluxPipeline.from_pretrained(repo_id, cache_dir=shared.opts.diffusers_dir, **kwargs, **diffusers_load_config)
+ pipe = cls.from_pretrained(repo_id, cache_dir=shared.opts.diffusers_dir, **kwargs, **diffusers_load_config)
# release memory
transformer = None
diff --git a/modules/modelloader.py b/modules/modelloader.py
index ce36a739b..b1b3930d6 100644
--- a/modules/modelloader.py
+++ b/modules/modelloader.py
@@ -326,6 +326,9 @@ def find_diffuser(name: str, full=False):
return [repo[0]['name']]
hf_api = hf.HfApi()
models = list(hf_api.list_models(model_name=name, library=['diffusers'], full=True, limit=20, sort="downloads", direction=-1))
+ if len(models) == 0:
+ models = list(hf_api.list_models(model_name=name, full=True, limit=20, sort="downloads", direction=-1)) # widen search
+ models = [m for m in models if m.id.startswith(name)] # filter exact
shared.log.debug(f'Searching diffusers models: {name} {len(models) > 0}')
if len(models) > 0:
if not full:
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index 2164134b1..44dff811b 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -231,7 +231,8 @@ def process_hires(p: processing.StableDiffusionProcessing, output):
output = shared.sd_model(**hires_args) # pylint: disable=not-callable
if isinstance(output, dict):
output = SimpleNamespace(**output)
- shared.history.add(output.images, info=processing.create_infotext(p), ops=p.ops)
+ if hasattr(output, 'images'):
+ shared.history.add(output.images, info=processing.create_infotext(p), ops=p.ops)
sd_models_compile.check_deepcache(enable=False)
sd_models_compile.openvino_post_compile(op="base")
except AssertionError as e:
@@ -313,7 +314,8 @@ def process_refine(p: processing.StableDiffusionProcessing, output):
output = shared.sd_refiner(**refiner_args) # pylint: disable=not-callable
if isinstance(output, dict):
output = SimpleNamespace(**output)
- shared.history.add(output.images, info=processing.create_infotext(p), ops=p.ops)
+ if hasattr(output, 'images'):
+ shared.history.add(output.images, info=processing.create_infotext(p), ops=p.ops)
sd_models_compile.openvino_post_compile(op="refiner")
except AssertionError as e:
shared.log.info(e)
diff --git a/modules/sd_checkpoint.py b/modules/sd_checkpoint.py
index afc5842e4..a4d84192f 100644
--- a/modules/sd_checkpoint.py
+++ b/modules/sd_checkpoint.py
@@ -198,8 +198,9 @@ def get_closet_checkpoint_match(s: str):
if shared.opts.sd_checkpoint_autodownload and s.count('/') == 1:
modelloader.hf_login()
found = modelloader.find_diffuser(s, full=True)
+ found = [f for f in found if f == s]
shared.log.info(f'HF search: model="{s}" results={found}')
- if found is not None and len(found) == 1 and found[0] == s:
+ if found is not None and len(found) == 1:
checkpoint_info = CheckpointInfo(s)
checkpoint_info.type = 'huggingface'
return checkpoint_info
diff --git a/modules/sd_models.py b/modules/sd_models.py
index a6ff19b6f..2ad204b46 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -771,7 +771,7 @@ def load_diffuser_file(model_type, pipeline, checkpoint_info, diffusers_load_con
return sd_model
-def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=None, op='model'): # pylint: disable=unused-argument
+def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=None, op='model', revision=None): # pylint: disable=unused-argument
if timer is None:
timer = Timer()
logging.getLogger("diffusers").setLevel(logging.ERROR)
@@ -784,6 +784,8 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No
"requires_safety_checker": False, # sd15 specific but we cant know ahead of time
# "use_safetensors": True,
}
+ if revision is not None:
+ diffusers_load_config['revision'] = revision
if shared.opts.diffusers_model_load_variant != 'default':
diffusers_load_config['variant'] = shared.opts.diffusers_model_load_variant
if shared.opts.diffusers_pipeline == 'Custom Diffusers Pipeline' and len(shared.opts.custom_diffusers_pipeline) > 0:
@@ -1077,6 +1079,8 @@ def set_diffuser_pipe(pipe, new_pipe_type):
'OmniGenPipeline',
'StableDiffusion3ControlNetPipeline',
'InstantIRPipeline',
+ 'FluxFillPipeline',
+ 'FluxControlPipeline',
]
n = getattr(pipe.__class__, '__name__', '')
@@ -1345,7 +1349,7 @@ def reload_text_encoder(initial=False):
set_t5(pipe=shared.sd_model, module='text_encoder_3', t5=shared.opts.sd_text_encoder, cache_dir=shared.opts.diffusers_dir)
-def reload_model_weights(sd_model=None, info=None, reuse_dict=False, op='model', force=False):
+def reload_model_weights(sd_model=None, info=None, reuse_dict=False, op='model', force=False, revision=None):
load_dict = shared.opts.sd_model_dict != model_data.sd_dict
from modules import lowvram, sd_hijack
checkpoint_info = info or select_checkpoint(op=op) # are we selecting model or dictionary
@@ -1390,7 +1394,7 @@ def reload_model_weights(sd_model=None, info=None, reuse_dict=False, op='model',
load_model(checkpoint_info, already_loaded_state_dict=state_dict, timer=timer, op=op)
model_data.sd_dict = shared.opts.sd_model_dict
else:
- load_diffuser(checkpoint_info, already_loaded_state_dict=state_dict, timer=timer, op=op)
+ load_diffuser(checkpoint_info, already_loaded_state_dict=state_dict, timer=timer, op=op, revision=revision)
if load_dict and next_checkpoint_info is not None:
model_data.sd_dict = shared.opts.sd_model_dict
shared.opts.data["sd_model_checkpoint"] = next_checkpoint_info.title
diff --git a/scripts/animatediff.py b/scripts/animatediff.py
index 4c50f9cf6..91db60915 100644
--- a/scripts/animatediff.py
+++ b/scripts/animatediff.py
@@ -189,7 +189,7 @@ def set_free_noise(frames):
class Script(scripts.Script):
def title(self):
- return 'Video AnimateDiff'
+ return 'Video: AnimateDiff'
def show(self, is_img2img):
# return scripts.AlwaysVisible if shared.native else False
diff --git a/scripts/cogvideo.py b/scripts/cogvideo.py
index 7f2c7225e..c988c05c4 100644
--- a/scripts/cogvideo.py
+++ b/scripts/cogvideo.py
@@ -22,7 +22,7 @@
class Script(scripts.Script):
def title(self):
- return 'Video CogVideoX'
+ return 'Video: CogVideoX'
def show(self, is_img2img):
return shared.native
diff --git a/scripts/flux_tools.py b/scripts/flux_tools.py
new file mode 100644
index 000000000..9a2fdbd63
--- /dev/null
+++ b/scripts/flux_tools.py
@@ -0,0 +1,115 @@
+# https://github.com/huggingface/diffusers/pull/9985
+
+import time
+import gradio as gr
+import diffusers
+from modules import scripts, processing, shared, devices, sd_models
+from installer import install
+
+
+redux_pipe: diffusers.FluxPriorReduxPipeline = None
+processor_canny = None
+processor_depth = None
+title = 'Flux Tools'
+
+
+class Script(scripts.Script):
+ def title(self):
+ return f'{title}'
+
+ def show(self, is_img2img):
+ return is_img2img if shared.native else False
+
+ def ui(self, _is_img2img): # ui elements
+ with gr.Row():
+ gr.HTML('  Flux.1 Redux
')
+ with gr.Row():
+ tool = gr.Dropdown(label='Tool', choices=['None', 'Redux', 'Fill', 'Canny', 'Depth'], value='None')
+ strength = gr.Checkbox(label='Override denoise strength', value=True)
+ return [tool, strength]
+
+ def run(self, p: processing.StableDiffusionProcessing, tool: str = 'None', strength: bool = True): # pylint: disable=arguments-differ
+ global redux_pipe, processor_canny, processor_depth # pylint: disable=global-statement
+ if tool is None or tool == 'None':
+ return
+ supported_model_list = ['f1']
+ if shared.sd_model_type not in supported_model_list:
+ shared.log.warning(f'{title}: class={shared.sd_model.__class__.__name__} model={shared.sd_model_type} required={supported_model_list}')
+ return None
+ image = getattr(p, 'init_images', None)
+ if image is None or len(image) == 0:
+ shared.log.error(f'{title}: tool={tool} no init_images')
+ return None
+ else:
+ image = image[0] if isinstance(image, list) else image
+
+ shared.log.info(f'{title}: tool={tool} init')
+
+ t0 = time.time()
+ if tool == 'Redux':
+ # pipe_prior_redux = FluxPriorReduxPipeline.from_pretrained("black-forest-labs/FLUX.1-Redux-dev", revision="refs/pr/8", torch_dtype=torch.bfloat16).to("cuda")
+ if redux_pipe is None:
+ redux_pipe = diffusers.FluxPriorReduxPipeline.from_pretrained(
+ "black-forest-labs/FLUX.1-Redux-dev",
+ revision="refs/pr/8",
+ torch_dtype=devices.dtype,
+ cache_dir=shared.opts.hfcache_dir
+ ).to(devices.device)
+ redux_output = redux_pipe(image)
+ for k, v in redux_output.items():
+ p.task_args[k] = v
+ else:
+ if redux_pipe is not None:
+ shared.log.debug(f'{title}: tool=Redux unload')
+ redux_pipe = None
+
+ if tool == 'Fill':
+ # pipe = FluxFillPipeline.from_pretrained("black-forest-labs/FLUX.1-Fill-dev", torch_dtype=torch.bfloat16, revision="refs/pr/4").to("cuda")
+ if p.image_mask is None:
+ shared.log.error(f'{title}: tool={tool} no image_mask')
+ return None
+ if shared.sd_model.__class__.__name__ != 'FluxFillPipeline':
+ shared.opts.data["sd_model_checkpoint"] = "black-forest-labs/FLUX.1-Fill-dev"
+ sd_models.reload_model_weights(op='model', revision="refs/pr/4")
+ p.task_args['image'] = image
+ p.task_args['mask_image'] = p.image_mask
+
+ if tool == 'Canny':
+ # pipe = FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-Canny-dev", torch_dtype=torch.bfloat16, revision="refs/pr/1").to("cuda")
+ install('controlnet-aux')
+ install('timm==0.9.16')
+ if shared.sd_model.__class__.__name__ != 'FluxControlPipeline' or 'Canny' not in shared.opts.sd_model_checkpoint:
+ shared.opts.data["sd_model_checkpoint"] = "black-forest-labs/FLUX.1-Canny-dev"
+ sd_models.reload_model_weights(op='model', revision="refs/pr/1")
+ if processor_canny is None:
+ from controlnet_aux import CannyDetector
+ processor_canny = CannyDetector()
+ control_image = processor_canny(image, low_threshold=50, high_threshold=200, detect_resolution=1024, image_resolution=1024)
+ p.task_args['control_image'] = control_image
+ if strength:
+ p.task_args['strength'] = None
+ else:
+ if processor_canny is not None:
+ shared.log.debug(f'{title}: tool=Canny unload processor')
+ processor_canny = None
+
+ if tool == 'Depth':
+ # pipe = FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-Depth-dev", torch_dtype=torch.bfloat16, revision="refs/pr/1").to("cuda")
+ install('git+https://github.com/asomoza/image_gen_aux.git', 'image_gen_aux')
+ if shared.sd_model.__class__.__name__ != 'FluxControlPipeline' or 'Depth' not in shared.opts.sd_model_checkpoint:
+ shared.opts.data["sd_model_checkpoint"] = "black-forest-labs/FLUX.1-Depth-dev"
+ sd_models.reload_model_weights(op='model', revision="refs/pr/1")
+ if processor_depth is None:
+ from image_gen_aux import DepthPreprocessor
+ processor_depth = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
+ control_image = processor_depth(control_image)[0].convert("RGB")
+ p.task_args['control_image'] = control_image
+ if strength:
+ p.task_args['strength'] = None
+ else:
+ if processor_depth is not None:
+ shared.log.debug(f'{title}: tool=Depth unload processor')
+ processor_depth = None
+
+ shared.log.debug(f'{title}: tool={tool} ready time={time.time() - t0:.2f}')
+ devices.torch_gc()
diff --git a/scripts/image2video.py b/scripts/image2video.py
index 876ed3193..5e08922ee 100644
--- a/scripts/image2video.py
+++ b/scripts/image2video.py
@@ -13,7 +13,7 @@
class Script(scripts.Script):
def title(self):
- return 'Video VGen Image-to-Video'
+ return 'Video: VGen Image-to-Video'
def show(self, is_img2img):
return is_img2img if shared.native else False
diff --git a/wiki b/wiki
index 30f3265bb..313a6b911 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit 30f3265bb06ac738e4467f58be4df3fc4b49c08b
+Subproject commit 313a6b911bd239b4fa8092ed89b936428214342e
From d7489dc0fa28a96c8238e3757b14ce487aae2fe0 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Sat, 23 Nov 2024 15:27:43 -0500
Subject: [PATCH 004/162] update modernui
Signed-off-by: Vladimir Mandic
---
extensions-builtin/sdnext-modernui | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui
index 4647bd7f8..6bc2f504e 160000
--- a/extensions-builtin/sdnext-modernui
+++ b/extensions-builtin/sdnext-modernui
@@ -1 +1 @@
-Subproject commit 4647bd7f86be9d2783a9ba1f38acaa9bcec942d2
+Subproject commit 6bc2f504e57eb75ebac1e9ec6c212549ebcfbc18
From a2590222ed0fd58b9c99461d76502a410047cad9 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Sat, 23 Nov 2024 18:16:18 -0500
Subject: [PATCH 005/162] flux tools
Signed-off-by: Vladimir Mandic
---
modules/sd_checkpoint.py | 5 ++++-
scripts/flux_tools.py | 20 ++++++++++++++------
2 files changed, 18 insertions(+), 7 deletions(-)
diff --git a/modules/sd_checkpoint.py b/modules/sd_checkpoint.py
index a4d84192f..20654e28b 100644
--- a/modules/sd_checkpoint.py
+++ b/modules/sd_checkpoint.py
@@ -168,7 +168,10 @@ def update_model_hashes():
def get_closet_checkpoint_match(s: str):
if s.startswith('https://huggingface.co/'):
- s = s.replace('https://huggingface.co/', '')
+ model_name = s.replace('https://huggingface.co/', '')
+ checkpoint_info = CheckpointInfo(model_name) # create a virutal model info
+ checkpoint_info.type = 'huggingface'
+ return checkpoint_info
if s.startswith('huggingface/'):
model_name = s.replace('huggingface/', '')
checkpoint_info = CheckpointInfo(model_name) # create a virutal model info
diff --git a/scripts/flux_tools.py b/scripts/flux_tools.py
index 9a2fdbd63..e5fe443b7 100644
--- a/scripts/flux_tools.py
+++ b/scripts/flux_tools.py
@@ -25,10 +25,12 @@ def ui(self, _is_img2img): # ui elements
gr.HTML('  Flux.1 Redux
')
with gr.Row():
tool = gr.Dropdown(label='Tool', choices=['None', 'Redux', 'Fill', 'Canny', 'Depth'], value='None')
+ with gr.Row():
+ process = gr.Checkbox(label='Preprocess input images', value=True)
strength = gr.Checkbox(label='Override denoise strength', value=True)
- return [tool, strength]
+ return [tool, strength, process]
- def run(self, p: processing.StableDiffusionProcessing, tool: str = 'None', strength: bool = True): # pylint: disable=arguments-differ
+ def run(self, p: processing.StableDiffusionProcessing, tool: str = 'None', strength: bool = True, process: bool = True): # pylint: disable=arguments-differ
global redux_pipe, processor_canny, processor_depth # pylint: disable=global-statement
if tool is None or tool == 'None':
return
@@ -84,8 +86,11 @@ def run(self, p: processing.StableDiffusionProcessing, tool: str = 'None', stren
if processor_canny is None:
from controlnet_aux import CannyDetector
processor_canny = CannyDetector()
- control_image = processor_canny(image, low_threshold=50, high_threshold=200, detect_resolution=1024, image_resolution=1024)
- p.task_args['control_image'] = control_image
+ if process:
+ control_image = processor_canny(image, low_threshold=50, high_threshold=200, detect_resolution=1024, image_resolution=1024)
+ p.task_args['control_image'] = control_image
+ else:
+ p.task_args['control_image'] = image
if strength:
p.task_args['strength'] = None
else:
@@ -102,8 +107,11 @@ def run(self, p: processing.StableDiffusionProcessing, tool: str = 'None', stren
if processor_depth is None:
from image_gen_aux import DepthPreprocessor
processor_depth = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
- control_image = processor_depth(control_image)[0].convert("RGB")
- p.task_args['control_image'] = control_image
+ if process:
+ control_image = processor_depth(image)[0].convert("RGB")
+ p.task_args['control_image'] = control_image
+ else:
+ p.task_args['control_image'] = image
if strength:
p.task_args['strength'] = None
else:
From 9c486320df8af76011502e97a292292691eef005 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Sat, 23 Nov 2024 19:24:18 -0500
Subject: [PATCH 006/162] update ui
Signed-off-by: Vladimir Mandic
---
extensions-builtin/sdnext-modernui | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui
index 6bc2f504e..b31453f9d 160000
--- a/extensions-builtin/sdnext-modernui
+++ b/extensions-builtin/sdnext-modernui
@@ -1 +1 @@
-Subproject commit 6bc2f504e57eb75ebac1e9ec6c212549ebcfbc18
+Subproject commit b31453f9d109456819673e8574162edb70fef73c
From cb561fa48617eb7af096e17be357057050071109 Mon Sep 17 00:00:00 2001
From: AI-Casanova <54461896+AI-Casanova@users.noreply.github.com>
Date: Sat, 23 Nov 2024 21:57:03 -0600
Subject: [PATCH 007/162] Major lora refactor: works on my machine edition
---
.../Lora/scripts/lora_script.py | 14 +-
modules/lora/extra_networks_lora.py | 151 ++++++
modules/lora/lora.py | 8 +
modules/lora/lora_convert.py | 477 ++++++++++++++++++
modules/lora/lora_extract.py | 271 ++++++++++
modules/lora/lyco_helpers.py | 66 +++
modules/lora/network.py | 187 +++++++
modules/lora/network_full.py | 26 +
modules/lora/network_glora.py | 30 ++
modules/lora/network_hada.py | 46 ++
modules/lora/network_ia3.py | 24 +
modules/lora/network_lokr.py | 57 +++
modules/lora/network_lora.py | 78 +++
modules/lora/network_norm.py | 23 +
modules/lora/network_oft.py | 81 +++
modules/lora/network_overrides.py | 49 ++
modules/lora/networks.py | 453 +++++++++++++++++
modules/lora/ui_extra_networks_lora.py | 123 +++++
modules/processing_diffusers.py | 5 +
modules/shared.py | 1 +
scripts/lora_script.py | 62 +++
21 files changed, 2225 insertions(+), 7 deletions(-)
create mode 100644 modules/lora/extra_networks_lora.py
create mode 100644 modules/lora/lora.py
create mode 100644 modules/lora/lora_convert.py
create mode 100644 modules/lora/lora_extract.py
create mode 100644 modules/lora/lyco_helpers.py
create mode 100644 modules/lora/network.py
create mode 100644 modules/lora/network_full.py
create mode 100644 modules/lora/network_glora.py
create mode 100644 modules/lora/network_hada.py
create mode 100644 modules/lora/network_ia3.py
create mode 100644 modules/lora/network_lokr.py
create mode 100644 modules/lora/network_lora.py
create mode 100644 modules/lora/network_norm.py
create mode 100644 modules/lora/network_oft.py
create mode 100644 modules/lora/network_overrides.py
create mode 100644 modules/lora/networks.py
create mode 100644 modules/lora/ui_extra_networks_lora.py
create mode 100644 scripts/lora_script.py
diff --git a/extensions-builtin/Lora/scripts/lora_script.py b/extensions-builtin/Lora/scripts/lora_script.py
index ffbef47d9..dea2985b3 100644
--- a/extensions-builtin/Lora/scripts/lora_script.py
+++ b/extensions-builtin/Lora/scripts/lora_script.py
@@ -5,7 +5,7 @@
from network import NetworkOnDisk
from ui_extra_networks_lora import ExtraNetworksPageLora
from extra_networks_lora import ExtraNetworkLora
-from modules import script_callbacks, extra_networks, ui_extra_networks, ui_models # pylint: disable=unused-import
+from modules import script_callbacks, extra_networks, ui_extra_networks, ui_models, shared # pylint: disable=unused-import
re_lora = re.compile(" 0 else 1.0
+ v = np.interp(step, m[1], m[0])
+ return v
+ else:
+ return m
+
+ stepwise = calculate_weight(sorted_positions(param), step, steps)
+ return stepwise
+
+
+def prompt(p):
+ if shared.opts.lora_apply_tags == 0:
+ return
+ all_tags = []
+ for loaded in networks.loaded_networks:
+ page = [en for en in shared.extra_networks if en.name == 'lora'][0]
+ item = page.create_item(loaded.name)
+ tags = (item or {}).get("tags", {})
+ loaded.tags = list(tags)
+ if len(loaded.tags) == 0:
+ loaded.tags.append(loaded.name)
+ if shared.opts.lora_apply_tags > 0:
+ loaded.tags = loaded.tags[:shared.opts.lora_apply_tags]
+ all_tags.extend(loaded.tags)
+ if len(all_tags) > 0:
+ shared.log.debug(f"Load network: type=LoRA tags={all_tags} max={shared.opts.lora_apply_tags} apply")
+ all_tags = ', '.join(all_tags)
+ p.extra_generation_params["LoRA tags"] = all_tags
+ if '_tags_' in p.prompt:
+ p.prompt = p.prompt.replace('_tags_', all_tags)
+ else:
+ p.prompt = f"{p.prompt}, {all_tags}"
+ if p.all_prompts is not None:
+ for i in range(len(p.all_prompts)):
+ if '_tags_' in p.all_prompts[i]:
+ p.all_prompts[i] = p.all_prompts[i].replace('_tags_', all_tags)
+ else:
+ p.all_prompts[i] = f"{p.all_prompts[i]}, {all_tags}"
+
+
+def infotext(p):
+ names = [i.name for i in networks.loaded_networks]
+ if len(names) > 0:
+ p.extra_generation_params["LoRA networks"] = ", ".join(names)
+ if shared.opts.lora_add_hashes_to_infotext:
+ network_hashes = []
+ for item in networks.loaded_networks:
+ if not item.network_on_disk.shorthash:
+ continue
+ network_hashes.append(item.network_on_disk.shorthash)
+ if len(network_hashes) > 0:
+ p.extra_generation_params["LoRA hashes"] = ", ".join(network_hashes)
+
+
+def parse(p, params_list, step=0):
+ names = []
+ te_multipliers = []
+ unet_multipliers = []
+ dyn_dims = []
+ for params in params_list:
+ assert params.items
+ names.append(params.positional[0])
+ te_multiplier = params.named.get("te", params.positional[1] if len(params.positional) > 1 else shared.opts.extra_networks_default_multiplier)
+ if isinstance(te_multiplier, str) and "@" in te_multiplier:
+ te_multiplier = get_stepwise(te_multiplier, step, p.steps)
+ else:
+ te_multiplier = float(te_multiplier)
+ unet_multiplier = [params.positional[2] if len(params.positional) > 2 else te_multiplier] * 3
+ unet_multiplier = [params.named.get("unet", unet_multiplier[0])] * 3
+ unet_multiplier[0] = params.named.get("in", unet_multiplier[0])
+ unet_multiplier[1] = params.named.get("mid", unet_multiplier[1])
+ unet_multiplier[2] = params.named.get("out", unet_multiplier[2])
+ for i in range(len(unet_multiplier)):
+ if isinstance(unet_multiplier[i], str) and "@" in unet_multiplier[i]:
+ unet_multiplier[i] = get_stepwise(unet_multiplier[i], step, p.steps)
+ else:
+ unet_multiplier[i] = float(unet_multiplier[i])
+ dyn_dim = int(params.positional[3]) if len(params.positional) > 3 else None
+ dyn_dim = int(params.named["dyn"]) if "dyn" in params.named else dyn_dim
+ te_multipliers.append(te_multiplier)
+ unet_multipliers.append(unet_multiplier)
+ dyn_dims.append(dyn_dim)
+ return names, te_multipliers, unet_multipliers, dyn_dims
+
+
+class ExtraNetworkLora(extra_networks.ExtraNetwork):
+
+ def __init__(self):
+ super().__init__('lora')
+ self.active = False
+ self.model = None
+ self.errors = {}
+
+ def activate(self, p, params_list, step=0):
+ t0 = time.time()
+ self.errors.clear()
+ if self.active:
+ if self.model != shared.opts.sd_model_checkpoint: # reset if model changed
+ self.active = False
+ if len(params_list) > 0 and not self.active: # activate patches once
+ shared.log.debug(f'Activate network: type=LoRA model="{shared.opts.sd_model_checkpoint}"')
+ self.active = True
+ self.model = shared.opts.sd_model_checkpoint
+ names, te_multipliers, unet_multipliers, dyn_dims = parse(p, params_list, step)
+ networks.load_networks(names, te_multipliers, unet_multipliers, dyn_dims)
+ t1 = time.time()
+ if len(networks.loaded_networks) > 0 and step == 0:
+ infotext(p)
+ prompt(p)
+ shared.log.info(f'Load network: type=LoRA apply={[n.name for n in networks.loaded_networks]} te={te_multipliers} unet={unet_multipliers} dims={dyn_dims} load={t1-t0:.2f}')
+
+ def deactivate(self, p):
+ t0 = time.time()
+ if shared.native and len(networks.diffuser_loaded) > 0:
+ if hasattr(shared.sd_model, "unload_lora_weights") and hasattr(shared.sd_model, "text_encoder"):
+ if not (shared.compiled_model_state is not None and shared.compiled_model_state.is_compiled is True):
+ try:
+ if shared.opts.lora_fuse_diffusers:
+ shared.sd_model.unfuse_lora()
+ shared.sd_model.unload_lora_weights() # fails for non-CLIP models
+ except Exception:
+ pass
+ t1 = time.time()
+ networks.timer['restore'] += t1 - t0
+ if self.active and networks.debug:
+ shared.log.debug(f"Network end: type=LoRA load={networks.timer['load']:.2f} apply={networks.timer['apply']:.2f} restore={networks.timer['restore']:.2f}")
+ if self.errors:
+ for k, v in self.errors.items():
+ shared.log.error(f'LoRA: name="{k}" errors={v}')
+ self.errors.clear()
diff --git a/modules/lora/lora.py b/modules/lora/lora.py
new file mode 100644
index 000000000..33adfe05c
--- /dev/null
+++ b/modules/lora/lora.py
@@ -0,0 +1,8 @@
+# import networks
+#
+# list_available_loras = networks.list_available_networks
+# available_loras = networks.available_networks
+# available_lora_aliases = networks.available_network_aliases
+# available_lora_hash_lookup = networks.available_network_hash_lookup
+# forbidden_lora_aliases = networks.forbidden_network_aliases
+# loaded_loras = networks.loaded_networks
diff --git a/modules/lora/lora_convert.py b/modules/lora/lora_convert.py
new file mode 100644
index 000000000..6bf563125
--- /dev/null
+++ b/modules/lora/lora_convert.py
@@ -0,0 +1,477 @@
+import os
+import re
+import bisect
+from typing import Dict
+import torch
+from modules import shared
+
+
+debug = os.environ.get('SD_LORA_DEBUG', None) is not None
+suffix_conversion = {
+ "attentions": {},
+ "resnets": {
+ "conv1": "in_layers_2",
+ "conv2": "out_layers_3",
+ "norm1": "in_layers_0",
+ "norm2": "out_layers_0",
+ "time_emb_proj": "emb_layers_1",
+ "conv_shortcut": "skip_connection",
+ }
+}
+re_digits = re.compile(r"\d+")
+re_x_proj = re.compile(r"(.*)_([qkv]_proj)$")
+re_compiled = {}
+
+
+def make_unet_conversion_map() -> Dict[str, str]:
+ unet_conversion_map_layer = []
+
+ for i in range(3): # num_blocks is 3 in sdxl
+ # loop over downblocks/upblocks
+ for j in range(2):
+ # loop over resnets/attentions for downblocks
+ hf_down_res_prefix = f"down_blocks.{i}.resnets.{j}."
+ sd_down_res_prefix = f"input_blocks.{3 * i + j + 1}.0."
+ unet_conversion_map_layer.append((sd_down_res_prefix, hf_down_res_prefix))
+ if i < 3:
+ # no attention layers in down_blocks.3
+ hf_down_atn_prefix = f"down_blocks.{i}.attentions.{j}."
+ sd_down_atn_prefix = f"input_blocks.{3 * i + j + 1}.1."
+ unet_conversion_map_layer.append((sd_down_atn_prefix, hf_down_atn_prefix))
+
+ for j in range(3):
+ # loop over resnets/attentions for upblocks
+ hf_up_res_prefix = f"up_blocks.{i}.resnets.{j}."
+ sd_up_res_prefix = f"output_blocks.{3 * i + j}.0."
+ unet_conversion_map_layer.append((sd_up_res_prefix, hf_up_res_prefix))
+ # if i > 0: commentout for sdxl
+ # no attention layers in up_blocks.0
+ hf_up_atn_prefix = f"up_blocks.{i}.attentions.{j}."
+ sd_up_atn_prefix = f"output_blocks.{3 * i + j}.1."
+ unet_conversion_map_layer.append((sd_up_atn_prefix, hf_up_atn_prefix))
+
+ if i < 3:
+ # no downsample in down_blocks.3
+ hf_downsample_prefix = f"down_blocks.{i}.downsamplers.0.conv."
+ sd_downsample_prefix = f"input_blocks.{3 * (i + 1)}.0.op."
+ unet_conversion_map_layer.append((sd_downsample_prefix, hf_downsample_prefix))
+ # no upsample in up_blocks.3
+ hf_upsample_prefix = f"up_blocks.{i}.upsamplers.0."
+ sd_upsample_prefix = f"output_blocks.{3 * i + 2}.{2}." # change for sdxl
+ unet_conversion_map_layer.append((sd_upsample_prefix, hf_upsample_prefix))
+
+ hf_mid_atn_prefix = "mid_block.attentions.0."
+ sd_mid_atn_prefix = "middle_block.1."
+ unet_conversion_map_layer.append((sd_mid_atn_prefix, hf_mid_atn_prefix))
+
+ for j in range(2):
+ hf_mid_res_prefix = f"mid_block.resnets.{j}."
+ sd_mid_res_prefix = f"middle_block.{2 * j}."
+ unet_conversion_map_layer.append((sd_mid_res_prefix, hf_mid_res_prefix))
+
+ unet_conversion_map_resnet = [
+ # (stable-diffusion, HF Diffusers)
+ ("in_layers.0.", "norm1."),
+ ("in_layers.2.", "conv1."),
+ ("out_layers.0.", "norm2."),
+ ("out_layers.3.", "conv2."),
+ ("emb_layers.1.", "time_emb_proj."),
+ ("skip_connection.", "conv_shortcut."),
+ ]
+
+ unet_conversion_map = []
+ for sd, hf in unet_conversion_map_layer:
+ if "resnets" in hf:
+ for sd_res, hf_res in unet_conversion_map_resnet:
+ unet_conversion_map.append((sd + sd_res, hf + hf_res))
+ else:
+ unet_conversion_map.append((sd, hf))
+
+ for j in range(2):
+ hf_time_embed_prefix = f"time_embedding.linear_{j + 1}."
+ sd_time_embed_prefix = f"time_embed.{j * 2}."
+ unet_conversion_map.append((sd_time_embed_prefix, hf_time_embed_prefix))
+
+ for j in range(2):
+ hf_label_embed_prefix = f"add_embedding.linear_{j + 1}."
+ sd_label_embed_prefix = f"label_emb.0.{j * 2}."
+ unet_conversion_map.append((sd_label_embed_prefix, hf_label_embed_prefix))
+
+ unet_conversion_map.append(("input_blocks.0.0.", "conv_in."))
+ unet_conversion_map.append(("out.0.", "conv_norm_out."))
+ unet_conversion_map.append(("out.2.", "conv_out."))
+
+ sd_hf_conversion_map = {sd.replace(".", "_")[:-1]: hf.replace(".", "_")[:-1] for sd, hf in unet_conversion_map}
+ return sd_hf_conversion_map
+
+
+class KeyConvert:
+ def __init__(self):
+ self.is_sdxl = True if shared.sd_model_type == "sdxl" else False
+ self.UNET_CONVERSION_MAP = make_unet_conversion_map() if self.is_sdxl else None
+ self.LORA_PREFIX_UNET = "lora_unet_"
+ self.LORA_PREFIX_TEXT_ENCODER = "lora_te_"
+ self.OFT_PREFIX_UNET = "oft_unet_"
+ # SDXL: must starts with LORA_PREFIX_TEXT_ENCODER
+ self.LORA_PREFIX_TEXT_ENCODER1 = "lora_te1_"
+ self.LORA_PREFIX_TEXT_ENCODER2 = "lora_te2_"
+
+ def __call__(self, key):
+ if self.is_sdxl:
+ if "diffusion_model" in key: # Fix NTC Slider naming error
+ key = key.replace("diffusion_model", "lora_unet")
+ map_keys = list(self.UNET_CONVERSION_MAP.keys()) # prefix of U-Net modules
+ map_keys.sort()
+ search_key = key.replace(self.LORA_PREFIX_UNET, "").replace(self.OFT_PREFIX_UNET, "").replace(self.LORA_PREFIX_TEXT_ENCODER1, "").replace(self.LORA_PREFIX_TEXT_ENCODER2, "")
+ position = bisect.bisect_right(map_keys, search_key)
+ map_key = map_keys[position - 1]
+ if search_key.startswith(map_key):
+ key = key.replace(map_key, self.UNET_CONVERSION_MAP[map_key]).replace("oft", "lora") # pylint: disable=unsubscriptable-object
+ if "lycoris" in key and "transformer" in key:
+ key = key.replace("lycoris", "lora_transformer")
+ sd_module = shared.sd_model.network_layer_mapping.get(key, None)
+ if sd_module is None:
+ sd_module = shared.sd_model.network_layer_mapping.get(key.replace("guidance", "timestep"), None) # FLUX1 fix
+ if debug and sd_module is None:
+ raise RuntimeError(f"LoRA key not found in network_layer_mapping: key={key} mapping={shared.sd_model.network_layer_mapping.keys()}")
+ return key, sd_module
+
+
+# Taken from https://github.com/huggingface/diffusers/blob/main/src/diffusers/loaders/lora_conversion_utils.py
+# Modified from 'lora_A' and 'lora_B' to 'lora_down' and 'lora_up'
+# Added early exit
+# The utilities under `_convert_kohya_flux_lora_to_diffusers()`
+# are taken from https://github.com/kohya-ss/sd-scripts/blob/a61cf73a5cb5209c3f4d1a3688dd276a4dfd1ecb/networks/convert_flux_lora.py
+# All credits go to `kohya-ss`.
+def _convert_to_ai_toolkit(sds_sd, ait_sd, sds_key, ait_key):
+ if sds_key + ".lora_down.weight" not in sds_sd:
+ return
+ down_weight = sds_sd.pop(sds_key + ".lora_down.weight")
+
+ # scale weight by alpha and dim
+ rank = down_weight.shape[0]
+ alpha = sds_sd.pop(sds_key + ".alpha").item() # alpha is scalar
+ scale = alpha / rank # LoRA is scaled by 'alpha / rank' in forward pass, so we need to scale it back here
+
+ # calculate scale_down and scale_up to keep the same value. if scale is 4, scale_down is 2 and scale_up is 2
+ scale_down = scale
+ scale_up = 1.0
+ while scale_down * 2 < scale_up:
+ scale_down *= 2
+ scale_up /= 2
+
+ ait_sd[ait_key + ".lora_down.weight"] = down_weight * scale_down
+ ait_sd[ait_key + ".lora_up.weight"] = sds_sd.pop(sds_key + ".lora_up.weight") * scale_up
+
+def _convert_to_ai_toolkit_cat(sds_sd, ait_sd, sds_key, ait_keys, dims=None):
+ if sds_key + ".lora_down.weight" not in sds_sd:
+ return
+ down_weight = sds_sd.pop(sds_key + ".lora_down.weight")
+ up_weight = sds_sd.pop(sds_key + ".lora_up.weight")
+ sd_lora_rank = down_weight.shape[0]
+
+ # scale weight by alpha and dim
+ alpha = sds_sd.pop(sds_key + ".alpha")
+ scale = alpha / sd_lora_rank
+
+ # calculate scale_down and scale_up
+ scale_down = scale
+ scale_up = 1.0
+ while scale_down * 2 < scale_up:
+ scale_down *= 2
+ scale_up /= 2
+
+ down_weight = down_weight * scale_down
+ up_weight = up_weight * scale_up
+
+ # calculate dims if not provided
+ num_splits = len(ait_keys)
+ if dims is None:
+ dims = [up_weight.shape[0] // num_splits] * num_splits
+ else:
+ assert sum(dims) == up_weight.shape[0]
+
+ # check upweight is sparse or not
+ is_sparse = False
+ if sd_lora_rank % num_splits == 0:
+ ait_rank = sd_lora_rank // num_splits
+ is_sparse = True
+ i = 0
+ for j in range(len(dims)):
+ for k in range(len(dims)):
+ if j == k:
+ continue
+ is_sparse = is_sparse and torch.all(
+ up_weight[i : i + dims[j], k * ait_rank : (k + 1) * ait_rank] == 0
+ )
+ i += dims[j]
+ # if is_sparse:
+ # print(f"weight is sparse: {sds_key}")
+
+ # make ai-toolkit weight
+ ait_down_keys = [k + ".lora_down.weight" for k in ait_keys]
+ ait_up_keys = [k + ".lora_up.weight" for k in ait_keys]
+ if not is_sparse:
+ # down_weight is copied to each split
+ ait_sd.update({k: down_weight for k in ait_down_keys})
+
+ # up_weight is split to each split
+ ait_sd.update({k: v for k, v in zip(ait_up_keys, torch.split(up_weight, dims, dim=0))}) # noqa: C416 # pylint: disable=unnecessary-comprehension
+ else:
+ # down_weight is chunked to each split
+ ait_sd.update({k: v for k, v in zip(ait_down_keys, torch.chunk(down_weight, num_splits, dim=0))}) # noqa: C416 # pylint: disable=unnecessary-comprehension
+
+ # up_weight is sparse: only non-zero values are copied to each split
+ i = 0
+ for j in range(len(dims)):
+ ait_sd[ait_up_keys[j]] = up_weight[i : i + dims[j], j * ait_rank : (j + 1) * ait_rank].contiguous()
+ i += dims[j]
+
+def _convert_text_encoder_lora_key(key, lora_name):
+ """
+ Converts a text encoder LoRA key to a Diffusers compatible key.
+ """
+ if lora_name.startswith(("lora_te_", "lora_te1_")):
+ key_to_replace = "lora_te_" if lora_name.startswith("lora_te_") else "lora_te1_"
+ else:
+ key_to_replace = "lora_te2_"
+
+ diffusers_name = key.replace(key_to_replace, "").replace("_", ".")
+ diffusers_name = diffusers_name.replace("text.model", "text_model")
+ diffusers_name = diffusers_name.replace("self.attn", "self_attn")
+ diffusers_name = diffusers_name.replace("q.proj.lora", "to_q_lora")
+ diffusers_name = diffusers_name.replace("k.proj.lora", "to_k_lora")
+ diffusers_name = diffusers_name.replace("v.proj.lora", "to_v_lora")
+ diffusers_name = diffusers_name.replace("out.proj.lora", "to_out_lora")
+ diffusers_name = diffusers_name.replace("text.projection", "text_projection")
+
+ if "self_attn" in diffusers_name or "text_projection" in diffusers_name:
+ pass
+ elif "mlp" in diffusers_name:
+ # Be aware that this is the new diffusers convention and the rest of the code might
+ # not utilize it yet.
+ diffusers_name = diffusers_name.replace(".lora.", ".lora_linear_layer.")
+ return diffusers_name
+
+def _convert_kohya_flux_lora_to_diffusers(state_dict):
+ def _convert_sd_scripts_to_ai_toolkit(sds_sd):
+ ait_sd = {}
+ for i in range(19):
+ _convert_to_ai_toolkit(
+ sds_sd,
+ ait_sd,
+ f"lora_unet_double_blocks_{i}_img_attn_proj",
+ f"transformer.transformer_blocks.{i}.attn.to_out.0",
+ )
+ _convert_to_ai_toolkit_cat(
+ sds_sd,
+ ait_sd,
+ f"lora_unet_double_blocks_{i}_img_attn_qkv",
+ [
+ f"transformer.transformer_blocks.{i}.attn.to_q",
+ f"transformer.transformer_blocks.{i}.attn.to_k",
+ f"transformer.transformer_blocks.{i}.attn.to_v",
+ ],
+ )
+ _convert_to_ai_toolkit(
+ sds_sd,
+ ait_sd,
+ f"lora_unet_double_blocks_{i}_img_mlp_0",
+ f"transformer.transformer_blocks.{i}.ff.net.0.proj",
+ )
+ _convert_to_ai_toolkit(
+ sds_sd,
+ ait_sd,
+ f"lora_unet_double_blocks_{i}_img_mlp_2",
+ f"transformer.transformer_blocks.{i}.ff.net.2",
+ )
+ _convert_to_ai_toolkit(
+ sds_sd,
+ ait_sd,
+ f"lora_unet_double_blocks_{i}_img_mod_lin",
+ f"transformer.transformer_blocks.{i}.norm1.linear",
+ )
+ _convert_to_ai_toolkit(
+ sds_sd,
+ ait_sd,
+ f"lora_unet_double_blocks_{i}_txt_attn_proj",
+ f"transformer.transformer_blocks.{i}.attn.to_add_out",
+ )
+ _convert_to_ai_toolkit_cat(
+ sds_sd,
+ ait_sd,
+ f"lora_unet_double_blocks_{i}_txt_attn_qkv",
+ [
+ f"transformer.transformer_blocks.{i}.attn.add_q_proj",
+ f"transformer.transformer_blocks.{i}.attn.add_k_proj",
+ f"transformer.transformer_blocks.{i}.attn.add_v_proj",
+ ],
+ )
+ _convert_to_ai_toolkit(
+ sds_sd,
+ ait_sd,
+ f"lora_unet_double_blocks_{i}_txt_mlp_0",
+ f"transformer.transformer_blocks.{i}.ff_context.net.0.proj",
+ )
+ _convert_to_ai_toolkit(
+ sds_sd,
+ ait_sd,
+ f"lora_unet_double_blocks_{i}_txt_mlp_2",
+ f"transformer.transformer_blocks.{i}.ff_context.net.2",
+ )
+ _convert_to_ai_toolkit(
+ sds_sd,
+ ait_sd,
+ f"lora_unet_double_blocks_{i}_txt_mod_lin",
+ f"transformer.transformer_blocks.{i}.norm1_context.linear",
+ )
+
+ for i in range(38):
+ _convert_to_ai_toolkit_cat(
+ sds_sd,
+ ait_sd,
+ f"lora_unet_single_blocks_{i}_linear1",
+ [
+ f"transformer.single_transformer_blocks.{i}.attn.to_q",
+ f"transformer.single_transformer_blocks.{i}.attn.to_k",
+ f"transformer.single_transformer_blocks.{i}.attn.to_v",
+ f"transformer.single_transformer_blocks.{i}.proj_mlp",
+ ],
+ dims=[3072, 3072, 3072, 12288],
+ )
+ _convert_to_ai_toolkit(
+ sds_sd,
+ ait_sd,
+ f"lora_unet_single_blocks_{i}_linear2",
+ f"transformer.single_transformer_blocks.{i}.proj_out",
+ )
+ _convert_to_ai_toolkit(
+ sds_sd,
+ ait_sd,
+ f"lora_unet_single_blocks_{i}_modulation_lin",
+ f"transformer.single_transformer_blocks.{i}.norm.linear",
+ )
+
+ if len(sds_sd) > 0:
+ return None
+
+ return ait_sd
+
+ return _convert_sd_scripts_to_ai_toolkit(state_dict)
+
+def _convert_kohya_sd3_lora_to_diffusers(state_dict):
+ def _convert_sd_scripts_to_ai_toolkit(sds_sd):
+ ait_sd = {}
+ for i in range(38):
+ _convert_to_ai_toolkit_cat(
+ sds_sd,
+ ait_sd,
+ f"lora_unet_joint_blocks_{i}_context_block_attn_qkv",
+ [
+ f"transformer.transformer_blocks.{i}.attn.to_q",
+ f"transformer.transformer_blocks.{i}.attn.to_k",
+ f"transformer.transformer_blocks.{i}.attn.to_v",
+ ],
+ )
+ _convert_to_ai_toolkit(
+ sds_sd,
+ ait_sd,
+ f"lora_unet_joint_blocks_{i}_context_block_mlp_fc1",
+ f"transformer.transformer_blocks.{i}.ff_context.net.0.proj",
+ )
+ _convert_to_ai_toolkit(
+ sds_sd,
+ ait_sd,
+ f"lora_unet_joint_blocks_{i}_context_block_mlp_fc2",
+ f"transformer.transformer_blocks.{i}.ff_context.net.2",
+ )
+ _convert_to_ai_toolkit(
+ sds_sd,
+ ait_sd,
+ f"lora_unet_joint_blocks_{i}_x_block_mlp_fc1",
+ f"transformer.transformer_blocks.{i}.ff.net.0.proj",
+ )
+ _convert_to_ai_toolkit(
+ sds_sd,
+ ait_sd,
+ f"lora_unet_joint_blocks_{i}_x_block_mlp_fc2",
+ f"transformer.transformer_blocks.{i}.ff.net.2",
+ )
+ _convert_to_ai_toolkit(
+ sds_sd,
+ ait_sd,
+ f"lora_unet_joint_blocks_{i}_context_block_adaLN_modulation_1",
+ f"transformer.transformer_blocks.{i}.norm1_context.linear",
+ )
+ _convert_to_ai_toolkit(
+ sds_sd,
+ ait_sd,
+ f"lora_unet_joint_blocks_{i}_x_block_adaLN_modulation_1",
+ f"transformer.transformer_blocks.{i}.norm1.linear",
+ )
+ _convert_to_ai_toolkit(
+ sds_sd,
+ ait_sd,
+ f"lora_unet_joint_blocks_{i}_context_block_attn_proj",
+ f"transformer.transformer_blocks.{i}.attn.to_add_out",
+ )
+ _convert_to_ai_toolkit(
+ sds_sd,
+ ait_sd,
+ f"lora_unet_joint_blocks_{i}_x_block_attn_proj",
+ f"transformer.transformer_blocks.{i}.attn.to_out_0",
+ )
+
+ _convert_to_ai_toolkit_cat(
+ sds_sd,
+ ait_sd,
+ f"lora_unet_joint_blocks_{i}_x_block_attn_qkv",
+ [
+ f"transformer.transformer_blocks.{i}.attn.add_q_proj",
+ f"transformer.transformer_blocks.{i}.attn.add_k_proj",
+ f"transformer.transformer_blocks.{i}.attn.add_v_proj",
+ ],
+ )
+ remaining_keys = list(sds_sd.keys())
+ te_state_dict = {}
+ if remaining_keys:
+ if not all(k.startswith("lora_te1") for k in remaining_keys):
+ raise ValueError(f"Incompatible keys detected: \n\n {', '.join(remaining_keys)}")
+ for key in remaining_keys:
+ if not key.endswith("lora_down.weight"):
+ continue
+
+ lora_name = key.split(".")[0]
+ lora_name_up = f"{lora_name}.lora_up.weight"
+ lora_name_alpha = f"{lora_name}.alpha"
+ diffusers_name = _convert_text_encoder_lora_key(key, lora_name)
+
+ if lora_name.startswith(("lora_te_", "lora_te1_")):
+ down_weight = sds_sd.pop(key)
+ sd_lora_rank = down_weight.shape[0]
+ te_state_dict[diffusers_name] = down_weight
+ te_state_dict[diffusers_name.replace(".down.", ".up.")] = sds_sd.pop(lora_name_up)
+
+ if lora_name_alpha in sds_sd:
+ alpha = sds_sd.pop(lora_name_alpha).item()
+ scale = alpha / sd_lora_rank
+
+ scale_down = scale
+ scale_up = 1.0
+ while scale_down * 2 < scale_up:
+ scale_down *= 2
+ scale_up /= 2
+
+ te_state_dict[diffusers_name] *= scale_down
+ te_state_dict[diffusers_name.replace(".down.", ".up.")] *= scale_up
+
+ if len(sds_sd) > 0:
+ print(f"Unsupported keys for ai-toolkit: {sds_sd.keys()}")
+
+ if te_state_dict:
+ te_state_dict = {f"text_encoder.{module_name}": params for module_name, params in te_state_dict.items()}
+
+ new_state_dict = {**ait_sd, **te_state_dict}
+ return new_state_dict
+
+ return _convert_sd_scripts_to_ai_toolkit(state_dict)
diff --git a/modules/lora/lora_extract.py b/modules/lora/lora_extract.py
new file mode 100644
index 000000000..c2e0a275b
--- /dev/null
+++ b/modules/lora/lora_extract.py
@@ -0,0 +1,271 @@
+import os
+import time
+import json
+import datetime
+import torch
+from safetensors.torch import save_file
+import gradio as gr
+from rich import progress as p
+from modules import shared, devices
+from modules.ui_common import create_refresh_button
+from modules.call_queue import wrap_gradio_gpu_call
+
+
+class SVDHandler:
+ def __init__(self, maxrank=0, rank_ratio=1):
+ self.network_name: str = None
+ self.U: torch.Tensor = None
+ self.S: torch.Tensor = None
+ self.Vh: torch.Tensor = None
+ self.maxrank: int = maxrank
+ self.rank_ratio: float = rank_ratio
+ self.rank: int = 0
+ self.out_size: int = None
+ self.in_size: int = None
+ self.kernel_size: tuple[int, int] = None
+ self.conv2d: bool = False
+
+ def decompose(self, weight, backupweight):
+ self.conv2d = len(weight.size()) == 4
+ self.kernel_size = None if not self.conv2d else weight.size()[2:4]
+ self.out_size, self.in_size = weight.size()[0:2]
+ diffweight = weight.clone().to(devices.device)
+ diffweight -= backupweight.to(devices.device)
+ if self.conv2d:
+ if self.conv2d and self.kernel_size != (1, 1):
+ diffweight = diffweight.flatten(start_dim=1)
+ else:
+ diffweight = diffweight.squeeze()
+ self.U, self.S, self.Vh = torch.svd_lowrank(diffweight.to(device=devices.device, dtype=torch.float), self.maxrank, 2)
+ # del diffweight
+ self.U = self.U.to(device=devices.cpu, dtype=torch.bfloat16)
+ self.S = self.S.to(device=devices.cpu, dtype=torch.bfloat16)
+ self.Vh = self.Vh.t().to(device=devices.cpu, dtype=torch.bfloat16) # svd_lowrank outputs a transposed matrix
+
+ def findrank(self):
+ if self.rank_ratio < 1:
+ S_squared = self.S.pow(2)
+ S_fro_sq = float(torch.sum(S_squared))
+ sum_S_squared = torch.cumsum(S_squared, dim=0) / S_fro_sq
+ index = int(torch.searchsorted(sum_S_squared, self.rank_ratio ** 2)) + 1
+ index = max(1, min(index, len(self.S) - 1))
+ self.rank = index
+ if self.maxrank > 0:
+ self.rank = min(self.rank, self.maxrank)
+ else:
+ self.rank = min(self.in_size, self.out_size, self.maxrank)
+
+ def makeweights(self):
+ self.findrank()
+ up = self.U[:, :self.rank] @ torch.diag(self.S[:self.rank])
+ down = self.Vh[:self.rank, :]
+ if self.conv2d and self.kernel_size is not None:
+ up = up.reshape(self.out_size, self.rank, 1, 1)
+ down = down.reshape(self.rank, self.in_size, self.kernel_size[0], self.kernel_size[1]) # pylint: disable=unsubscriptable-object
+ return_dict = {f'{self.network_name}.lora_up.weight': up.contiguous(),
+ f'{self.network_name}.lora_down.weight': down.contiguous(),
+ f'{self.network_name}.alpha': torch.tensor(down.shape[0]),
+ }
+ return return_dict
+
+
+def loaded_lora():
+ if not shared.sd_loaded:
+ return ""
+ loaded = set()
+ if hasattr(shared.sd_model, 'unet'):
+ for _name, module in shared.sd_model.unet.named_modules():
+ current = getattr(module, "network_current_names", None)
+ if current is not None:
+ current = [item[0] for item in current]
+ loaded.update(current)
+ return list(loaded)
+
+
+def loaded_lora_str():
+ return ", ".join(loaded_lora())
+
+
+def make_meta(fn, maxrank, rank_ratio):
+ meta = {
+ "model_spec.sai_model_spec": "1.0.0",
+ "model_spec.title": os.path.splitext(os.path.basename(fn))[0],
+ "model_spec.author": "SD.Next",
+ "model_spec.implementation": "https://github.com/vladmandic/automatic",
+ "model_spec.date": datetime.datetime.now().astimezone().replace(microsecond=0).isoformat(),
+ "model_spec.base_model": shared.opts.sd_model_checkpoint,
+ "model_spec.dtype": str(devices.dtype),
+ "model_spec.base_lora": json.dumps(loaded_lora()),
+ "model_spec.config": f"maxrank={maxrank} rank_ratio={rank_ratio}",
+ }
+ if shared.sd_model_type == "sdxl":
+ meta["model_spec.architecture"] = "stable-diffusion-xl-v1-base/lora" # sai standard
+ meta["ss_base_model_version"] = "sdxl_base_v1-0" # kohya standard
+ elif shared.sd_model_type == "sd":
+ meta["model_spec.architecture"] = "stable-diffusion-v1/lora"
+ meta["ss_base_model_version"] = "sd_v1"
+ elif shared.sd_model_type == "f1":
+ meta["model_spec.architecture"] = "flux-1-dev/lora"
+ meta["ss_base_model_version"] = "flux1"
+ elif shared.sd_model_type == "sc":
+ meta["model_spec.architecture"] = "stable-cascade-v1-prior/lora"
+ return meta
+
+
+def make_lora(fn, maxrank, auto_rank, rank_ratio, modules, overwrite):
+ if not shared.sd_loaded or not shared.native:
+ msg = "LoRA extract: model not loaded"
+ shared.log.warning(msg)
+ yield msg
+ return
+ if loaded_lora() == "":
+ msg = "LoRA extract: no LoRA detected"
+ shared.log.warning(msg)
+ yield msg
+ return
+ if not fn:
+ msg = "LoRA extract: target filename required"
+ shared.log.warning(msg)
+ yield msg
+ return
+ t0 = time.time()
+ maxrank = int(maxrank)
+ rank_ratio = 1 if not auto_rank else rank_ratio
+ shared.log.debug(f'LoRA extract: modules={modules} maxrank={maxrank} auto={auto_rank} ratio={rank_ratio} fn="{fn}"')
+ shared.state.begin('LoRA extract')
+
+ with p.Progress(p.TextColumn('[cyan]LoRA extract'), p.BarColumn(), p.TaskProgressColumn(), p.TimeRemainingColumn(), p.TimeElapsedColumn(), p.TextColumn('[cyan]{task.description}'), console=shared.console) as progress:
+
+ if 'te' in modules and getattr(shared.sd_model, 'text_encoder', None) is not None:
+ modules = shared.sd_model.text_encoder.named_modules()
+ task = progress.add_task(description="te1 decompose", total=len(list(modules)))
+ for name, module in shared.sd_model.text_encoder.named_modules():
+ progress.update(task, advance=1)
+ weights_backup = getattr(module, "network_weights_backup", None)
+ if weights_backup is None or getattr(module, "network_current_names", None) is None:
+ continue
+ prefix = "lora_te1_" if hasattr(shared.sd_model, 'text_encoder_2') else "lora_te_"
+ module.svdhandler = SVDHandler(maxrank, rank_ratio)
+ module.svdhandler.network_name = prefix + name.replace(".", "_")
+ with devices.inference_context():
+ module.svdhandler.decompose(module.weight, weights_backup)
+ progress.remove_task(task)
+ t1 = time.time()
+
+ if 'te' in modules and getattr(shared.sd_model, 'text_encoder_2', None) is not None:
+ modules = shared.sd_model.text_encoder_2.named_modules()
+ task = progress.add_task(description="te2 decompose", total=len(list(modules)))
+ for name, module in shared.sd_model.text_encoder_2.named_modules():
+ progress.update(task, advance=1)
+ weights_backup = getattr(module, "network_weights_backup", None)
+ if weights_backup is None or getattr(module, "network_current_names", None) is None:
+ continue
+ module.svdhandler = SVDHandler(maxrank, rank_ratio)
+ module.svdhandler.network_name = "lora_te2_" + name.replace(".", "_")
+ with devices.inference_context():
+ module.svdhandler.decompose(module.weight, weights_backup)
+ progress.remove_task(task)
+ t2 = time.time()
+
+ if 'unet' in modules and getattr(shared.sd_model, 'unet', None) is not None:
+ modules = shared.sd_model.unet.named_modules()
+ task = progress.add_task(description="unet decompose", total=len(list(modules)))
+ for name, module in shared.sd_model.unet.named_modules():
+ progress.update(task, advance=1)
+ weights_backup = getattr(module, "network_weights_backup", None)
+ if weights_backup is None or getattr(module, "network_current_names", None) is None:
+ continue
+ module.svdhandler = SVDHandler(maxrank, rank_ratio)
+ module.svdhandler.network_name = "lora_unet_" + name.replace(".", "_")
+ with devices.inference_context():
+ module.svdhandler.decompose(module.weight, weights_backup)
+ progress.remove_task(task)
+ t3 = time.time()
+
+ # TODO: Handle quant for Flux
+ # if 'te' in modules and getattr(shared.sd_model, 'transformer', None) is not None:
+ # for name, module in shared.sd_model.transformer.named_modules():
+ # if "norm" in name and "linear" not in name:
+ # continue
+ # weights_backup = getattr(module, "network_weights_backup", None)
+ # if weights_backup is None:
+ # continue
+ # module.svdhandler = SVDHandler()
+ # module.svdhandler.network_name = "lora_transformer_" + name.replace(".", "_")
+ # module.svdhandler.decompose(module.weight, weights_backup)
+ # module.svdhandler.findrank(rank, rank_ratio)
+
+ lora_state_dict = {}
+ for sub in ['text_encoder', 'text_encoder_2', 'unet', 'transformer']:
+ submodel = getattr(shared.sd_model, sub, None)
+ if submodel is not None:
+ modules = submodel.named_modules()
+ task = progress.add_task(description=f"{sub} exctract", total=len(list(modules)))
+ for _name, module in submodel.named_modules():
+ progress.update(task, advance=1)
+ if not hasattr(module, "svdhandler"):
+ continue
+ lora_state_dict.update(module.svdhandler.makeweights())
+ del module.svdhandler
+ progress.remove_task(task)
+ t4 = time.time()
+
+ if not os.path.isabs(fn):
+ fn = os.path.join(shared.cmd_opts.lora_dir, fn)
+ if not fn.endswith('.safetensors'):
+ fn += '.safetensors'
+ if os.path.exists(fn):
+ if overwrite:
+ os.remove(fn)
+ else:
+ msg = f'LoRA extract: fn="{fn}" file exists'
+ shared.log.warning(msg)
+ yield msg
+ return
+
+ shared.state.end()
+ meta = make_meta(fn, maxrank, rank_ratio)
+ shared.log.debug(f'LoRA metadata: {meta}')
+ try:
+ save_file(tensors=lora_state_dict, metadata=meta, filename=fn)
+ except Exception as e:
+ msg = f'LoRA extract error: fn="{fn}" {e}'
+ shared.log.error(msg)
+ yield msg
+ return
+ t5 = time.time()
+ shared.log.debug(f'LoRA extract: time={t5-t0:.2f} te1={t1-t0:.2f} te2={t2-t1:.2f} unet={t3-t2:.2f} save={t5-t4:.2f}')
+ keys = list(lora_state_dict.keys())
+ msg = f'LoRA extract: fn="{fn}" keys={len(keys)}'
+ shared.log.info(msg)
+ yield msg
+
+
+def create_ui():
+ def gr_show(visible=True):
+ return {"visible": visible, "__type__": "update"}
+
+ with gr.Tab(label="Extract LoRA"):
+ with gr.Row():
+ loaded = gr.Textbox(placeholder="Press refresh to query loaded LoRA", label="Loaded LoRA", interactive=False)
+ create_refresh_button(loaded, lambda: None, lambda: {'value': loaded_lora_str()}, "testid")
+ with gr.Group():
+ with gr.Row():
+ modules = gr.CheckboxGroup(label="Modules to extract", value=['unet'], choices=['te', 'unet'])
+ with gr.Row():
+ auto_rank = gr.Checkbox(value=False, label="Automatically determine rank")
+ rank_ratio = gr.Slider(label="Autorank ratio", value=1, minimum=0, maximum=1, step=0.05, visible=False)
+ rank = gr.Slider(label="Maximum rank", value=32, minimum=1, maximum=256)
+ with gr.Row():
+ filename = gr.Textbox(label="LoRA target filename")
+ overwrite = gr.Checkbox(value=False, label="Overwrite existing file")
+ with gr.Row():
+ extract = gr.Button(value="Extract LoRA", variant='primary')
+ status = gr.HTML(value="", show_label=False)
+
+ auto_rank.change(fn=lambda x: gr_show(x), inputs=[auto_rank], outputs=[rank_ratio])
+ extract.click(
+ fn=wrap_gradio_gpu_call(make_lora, extra_outputs=[]),
+ inputs=[filename, rank, auto_rank, rank_ratio, modules, overwrite],
+ outputs=[status]
+ )
diff --git a/modules/lora/lyco_helpers.py b/modules/lora/lyco_helpers.py
new file mode 100644
index 000000000..9a16d25ab
--- /dev/null
+++ b/modules/lora/lyco_helpers.py
@@ -0,0 +1,66 @@
+import torch
+
+
+def make_weight_cp(t, wa, wb):
+ temp = torch.einsum('i j k l, j r -> i r k l', t, wb)
+ return torch.einsum('i j k l, i r -> r j k l', temp, wa)
+
+
+def rebuild_conventional(up, down, shape, dyn_dim=None):
+ up = up.reshape(up.size(0), -1)
+ down = down.reshape(down.size(0), -1)
+ if dyn_dim is not None:
+ up = up[:, :dyn_dim]
+ down = down[:dyn_dim, :]
+ return (up @ down).reshape(shape)
+
+
+def rebuild_cp_decomposition(up, down, mid):
+ up = up.reshape(up.size(0), -1)
+ down = down.reshape(down.size(0), -1)
+ return torch.einsum('n m k l, i n, m j -> i j k l', mid, up, down)
+
+
+# copied from https://github.com/KohakuBlueleaf/LyCORIS/blob/dev/lycoris/modules/lokr.py
+def factorization(dimension: int, factor:int=-1) -> tuple[int, int]:
+ """
+ return a tuple of two value of input dimension decomposed by the number closest to factor
+ second value is higher or equal than first value.
+
+ In LoRA with Kroneckor Product, first value is a value for weight scale.
+ secon value is a value for weight.
+
+ Becuase of non-commutative property, A⊗B ≠ B⊗A. Meaning of two matrices is slightly different.
+
+ examples
+ factor
+ -1 2 4 8 16 ...
+ 127 -> 1, 127 127 -> 1, 127 127 -> 1, 127 127 -> 1, 127 127 -> 1, 127
+ 128 -> 8, 16 128 -> 2, 64 128 -> 4, 32 128 -> 8, 16 128 -> 8, 16
+ 250 -> 10, 25 250 -> 2, 125 250 -> 2, 125 250 -> 5, 50 250 -> 10, 25
+ 360 -> 8, 45 360 -> 2, 180 360 -> 4, 90 360 -> 8, 45 360 -> 12, 30
+ 512 -> 16, 32 512 -> 2, 256 512 -> 4, 128 512 -> 8, 64 512 -> 16, 32
+ 1024 -> 32, 32 1024 -> 2, 512 1024 -> 4, 256 1024 -> 8, 128 1024 -> 16, 64
+ """
+
+ if factor > 0 and (dimension % factor) == 0:
+ m = factor
+ n = dimension // factor
+ if m > n:
+ n, m = m, n
+ return m, n
+ if factor < 0:
+ factor = dimension
+ m, n = 1, dimension
+ length = m + n
+ while m length or new_m>factor:
+ break
+ m, n = new_m, new_n
+ if m > n:
+ n, m = m, n
+ return m, n
diff --git a/modules/lora/network.py b/modules/lora/network.py
new file mode 100644
index 000000000..0785ef9f4
--- /dev/null
+++ b/modules/lora/network.py
@@ -0,0 +1,187 @@
+import os
+from collections import namedtuple
+import enum
+
+from modules import sd_models, hashes, shared
+
+NetworkWeights = namedtuple('NetworkWeights', ['network_key', 'sd_key', 'w', 'sd_module'])
+metadata_tags_order = {"ss_sd_model_name": 1, "ss_resolution": 2, "ss_clip_skip": 3, "ss_num_train_images": 10, "ss_tag_frequency": 20}
+
+
+class SdVersion(enum.Enum):
+ Unknown = 1
+ SD1 = 2
+ SD2 = 3
+ SD3 = 3
+ SDXL = 4
+ SC = 5
+ F1 = 6
+
+
+class NetworkOnDisk:
+ def __init__(self, name, filename):
+ self.shorthash = None
+ self.hash = None
+ self.name = name
+ self.filename = filename
+ if filename.startswith(shared.cmd_opts.lora_dir):
+ self.fullname = os.path.splitext(filename[len(shared.cmd_opts.lora_dir):].strip("/"))[0]
+ else:
+ self.fullname = name
+ self.metadata = {}
+ self.is_safetensors = os.path.splitext(filename)[1].lower() == ".safetensors"
+ if self.is_safetensors:
+ self.metadata = sd_models.read_metadata_from_safetensors(filename)
+ if self.metadata:
+ m = {}
+ for k, v in sorted(self.metadata.items(), key=lambda x: metadata_tags_order.get(x[0], 999)):
+ m[k] = v
+ self.metadata = m
+ self.alias = self.metadata.get('ss_output_name', self.name)
+ sha256 = hashes.sha256_from_cache(self.filename, "lora/" + self.name) or hashes.sha256_from_cache(self.filename, "lora/" + self.name, use_addnet_hash=True) or self.metadata.get('sshs_model_hash')
+ self.set_hash(sha256)
+ self.sd_version = self.detect_version()
+
+ def detect_version(self):
+ base = str(self.metadata.get('ss_base_model_version', "")).lower()
+ arch = str(self.metadata.get('modelspec.architecture', "")).lower()
+ if base.startswith("sd_v1"):
+ return 'sd1'
+ if base.startswith("sdxl"):
+ return 'xl'
+ if base.startswith("stable_cascade"):
+ return 'sc'
+ if base.startswith("sd3"):
+ return 'sd3'
+ if base.startswith("flux"):
+ return 'f1'
+
+ if arch.startswith("stable-diffusion-v1"):
+ return 'sd1'
+ if arch.startswith("stable-diffusion-xl"):
+ return 'xl'
+ if arch.startswith("stable-cascade"):
+ return 'sc'
+ if arch.startswith("flux"):
+ return 'f1'
+
+ if "v1-5" in str(self.metadata.get('ss_sd_model_name', "")):
+ return 'sd1'
+ if str(self.metadata.get('ss_v2', "")) == "True":
+ return 'sd2'
+ if 'flux' in self.name.lower():
+ return 'f1'
+ if 'xl' in self.name.lower():
+ return 'xl'
+
+ return ''
+
+ def set_hash(self, v):
+ self.hash = v or ''
+ self.shorthash = self.hash[0:8]
+
+ def read_hash(self):
+ if not self.hash:
+ self.set_hash(hashes.sha256(self.filename, "lora/" + self.name, use_addnet_hash=self.is_safetensors) or '')
+
+ def get_alias(self):
+ import modules.lora.networks as networks
+ return self.name if shared.opts.lora_preferred_name == "filename" or self.alias.lower() in networks.forbidden_network_aliases else self.alias
+
+
+class Network: # LoraModule
+ def __init__(self, name, network_on_disk: NetworkOnDisk):
+ self.name = name
+ self.network_on_disk = network_on_disk
+ self.te_multiplier = 1.0
+ self.unet_multiplier = [1.0] * 3
+ self.dyn_dim = None
+ self.modules = {}
+ self.bundle_embeddings = {}
+ self.mtime = None
+ self.mentioned_name = None
+ self.tags = None
+ """the text that was used to add the network to prompt - can be either name or an alias"""
+
+
+class ModuleType:
+ def create_module(self, net: Network, weights: NetworkWeights) -> Network | None: # pylint: disable=W0613
+ return None
+
+
+class NetworkModule:
+ def __init__(self, net: Network, weights: NetworkWeights):
+ self.network = net
+ self.network_key = weights.network_key
+ self.sd_key = weights.sd_key
+ self.sd_module = weights.sd_module
+ if hasattr(self.sd_module, 'weight'):
+ self.shape = self.sd_module.weight.shape
+ self.dim = None
+ self.bias = weights.w.get("bias")
+ self.alpha = weights.w["alpha"].item() if "alpha" in weights.w else None
+ self.scale = weights.w["scale"].item() if "scale" in weights.w else None
+ self.dora_scale = weights.w.get("dora_scale", None)
+ self.dora_norm_dims = len(self.shape) - 1
+
+ def multiplier(self):
+ unet_multiplier = 3 * [self.network.unet_multiplier] if not isinstance(self.network.unet_multiplier, list) else self.network.unet_multiplier
+ if 'transformer' in self.sd_key[:20]:
+ return self.network.te_multiplier
+ if "down_blocks" in self.sd_key:
+ return unet_multiplier[0]
+ if "mid_block" in self.sd_key:
+ return unet_multiplier[1]
+ if "up_blocks" in self.sd_key:
+ return unet_multiplier[2]
+ else:
+ return unet_multiplier[0]
+
+ def calc_scale(self):
+ if self.scale is not None:
+ return self.scale
+ if self.dim is not None and self.alpha is not None:
+ return self.alpha / self.dim
+ return 1.0
+
+ def apply_weight_decompose(self, updown, orig_weight):
+ # Match the device/dtype
+ orig_weight = orig_weight.to(updown.dtype)
+ dora_scale = self.dora_scale.to(device=orig_weight.device, dtype=updown.dtype)
+ updown = updown.to(orig_weight.device)
+
+ merged_scale1 = updown + orig_weight
+ merged_scale1_norm = (
+ merged_scale1.transpose(0, 1)
+ .reshape(merged_scale1.shape[1], -1)
+ .norm(dim=1, keepdim=True)
+ .reshape(merged_scale1.shape[1], *[1] * self.dora_norm_dims)
+ .transpose(0, 1)
+ )
+
+ dora_merged = (
+ merged_scale1 * (dora_scale / merged_scale1_norm)
+ )
+ final_updown = dora_merged - orig_weight
+ return final_updown
+
+ def finalize_updown(self, updown, orig_weight, output_shape, ex_bias=None):
+ if self.bias is not None:
+ updown = updown.reshape(self.bias.shape)
+ updown += self.bias.to(orig_weight.device, dtype=orig_weight.dtype)
+ updown = updown.reshape(output_shape)
+ if len(output_shape) == 4:
+ updown = updown.reshape(output_shape)
+ if orig_weight.size().numel() == updown.size().numel():
+ updown = updown.reshape(orig_weight.shape)
+ if ex_bias is not None:
+ ex_bias = ex_bias * self.multiplier()
+ if self.dora_scale is not None:
+ updown = self.apply_weight_decompose(updown, orig_weight)
+ return updown * self.calc_scale() * self.multiplier(), ex_bias
+
+ def calc_updown(self, target):
+ raise NotImplementedError
+
+ def forward(self, x, y):
+ raise NotImplementedError
diff --git a/modules/lora/network_full.py b/modules/lora/network_full.py
new file mode 100644
index 000000000..5eb0b2e4e
--- /dev/null
+++ b/modules/lora/network_full.py
@@ -0,0 +1,26 @@
+import modules.lora.network as network
+
+
+class ModuleTypeFull(network.ModuleType):
+ def create_module(self, net: network.Network, weights: network.NetworkWeights):
+ if all(x in weights.w for x in ["diff"]):
+ return NetworkModuleFull(net, weights)
+ return None
+
+
+class NetworkModuleFull(network.NetworkModule): # pylint: disable=abstract-method
+ def __init__(self, net: network.Network, weights: network.NetworkWeights):
+ super().__init__(net, weights)
+
+ self.weight = weights.w.get("diff")
+ self.ex_bias = weights.w.get("diff_b")
+
+ def calc_updown(self, target):
+ output_shape = self.weight.shape
+ updown = self.weight.to(target.device, dtype=target.dtype)
+ if self.ex_bias is not None:
+ ex_bias = self.ex_bias.to(target.device, dtype=target.dtype)
+ else:
+ ex_bias = None
+
+ return self.finalize_updown(updown, target, output_shape, ex_bias)
diff --git a/modules/lora/network_glora.py b/modules/lora/network_glora.py
new file mode 100644
index 000000000..ffcb25986
--- /dev/null
+++ b/modules/lora/network_glora.py
@@ -0,0 +1,30 @@
+import modules.lora.network as network
+
+
+class ModuleTypeGLora(network.ModuleType):
+ def create_module(self, net: network.Network, weights: network.NetworkWeights):
+ if all(x in weights.w for x in ["a1.weight", "a2.weight", "alpha", "b1.weight", "b2.weight"]):
+ return NetworkModuleGLora(net, weights)
+ return None
+
+# adapted from https://github.com/KohakuBlueleaf/LyCORIS
+class NetworkModuleGLora(network.NetworkModule): # pylint: disable=abstract-method
+ def __init__(self, net: network.Network, weights: network.NetworkWeights):
+ super().__init__(net, weights)
+
+ if hasattr(self.sd_module, 'weight'):
+ self.shape = self.sd_module.weight.shape
+
+ self.w1a = weights.w["a1.weight"]
+ self.w1b = weights.w["b1.weight"]
+ self.w2a = weights.w["a2.weight"]
+ self.w2b = weights.w["b2.weight"]
+
+ def calc_updown(self, target): # pylint: disable=arguments-differ
+ w1a = self.w1a.to(target.device, dtype=target.dtype)
+ w1b = self.w1b.to(target.device, dtype=target.dtype)
+ w2a = self.w2a.to(target.device, dtype=target.dtype)
+ w2b = self.w2b.to(target.device, dtype=target.dtype)
+ output_shape = [w1a.size(0), w1b.size(1)]
+ updown = (w2b @ w1b) + ((target @ w2a) @ w1a)
+ return self.finalize_updown(updown, target, output_shape)
diff --git a/modules/lora/network_hada.py b/modules/lora/network_hada.py
new file mode 100644
index 000000000..6fc142b3b
--- /dev/null
+++ b/modules/lora/network_hada.py
@@ -0,0 +1,46 @@
+import modules.lora.lyco_helpers as lyco_helpers
+import modules.lora.network as network
+
+
+class ModuleTypeHada(network.ModuleType):
+ def create_module(self, net: network.Network, weights: network.NetworkWeights):
+ if all(x in weights.w for x in ["hada_w1_a", "hada_w1_b", "hada_w2_a", "hada_w2_b"]):
+ return NetworkModuleHada(net, weights)
+ return None
+
+
+class NetworkModuleHada(network.NetworkModule): # pylint: disable=abstract-method
+ def __init__(self, net: network.Network, weights: network.NetworkWeights):
+ super().__init__(net, weights)
+ if hasattr(self.sd_module, 'weight'):
+ self.shape = self.sd_module.weight.shape
+ self.w1a = weights.w["hada_w1_a"]
+ self.w1b = weights.w["hada_w1_b"]
+ self.dim = self.w1b.shape[0]
+ self.w2a = weights.w["hada_w2_a"]
+ self.w2b = weights.w["hada_w2_b"]
+ self.t1 = weights.w.get("hada_t1")
+ self.t2 = weights.w.get("hada_t2")
+
+ def calc_updown(self, target):
+ w1a = self.w1a.to(target.device, dtype=target.dtype)
+ w1b = self.w1b.to(target.device, dtype=target.dtype)
+ w2a = self.w2a.to(target.device, dtype=target.dtype)
+ w2b = self.w2b.to(target.device, dtype=target.dtype)
+ output_shape = [w1a.size(0), w1b.size(1)]
+ if self.t1 is not None:
+ output_shape = [w1a.size(1), w1b.size(1)]
+ t1 = self.t1.to(target.device, dtype=target.dtype)
+ updown1 = lyco_helpers.make_weight_cp(t1, w1a, w1b)
+ output_shape += t1.shape[2:]
+ else:
+ if len(w1b.shape) == 4:
+ output_shape += w1b.shape[2:]
+ updown1 = lyco_helpers.rebuild_conventional(w1a, w1b, output_shape)
+ if self.t2 is not None:
+ t2 = self.t2.to(target.device, dtype=target.dtype)
+ updown2 = lyco_helpers.make_weight_cp(t2, w2a, w2b)
+ else:
+ updown2 = lyco_helpers.rebuild_conventional(w2a, w2b, output_shape)
+ updown = updown1 * updown2
+ return self.finalize_updown(updown, target, output_shape)
diff --git a/modules/lora/network_ia3.py b/modules/lora/network_ia3.py
new file mode 100644
index 000000000..479e42526
--- /dev/null
+++ b/modules/lora/network_ia3.py
@@ -0,0 +1,24 @@
+import modules.lora.network as network
+
+class ModuleTypeIa3(network.ModuleType):
+ def create_module(self, net: network.Network, weights: network.NetworkWeights):
+ if all(x in weights.w for x in ["weight"]):
+ return NetworkModuleIa3(net, weights)
+ return None
+
+
+class NetworkModuleIa3(network.NetworkModule): # pylint: disable=abstract-method
+ def __init__(self, net: network.Network, weights: network.NetworkWeights):
+ super().__init__(net, weights)
+ self.w = weights.w["weight"]
+ self.on_input = weights.w["on_input"].item()
+
+ def calc_updown(self, target):
+ w = self.w.to(target.device, dtype=target.dtype)
+ output_shape = [w.size(0), target.size(1)]
+ if self.on_input:
+ output_shape.reverse()
+ else:
+ w = w.reshape(-1, 1)
+ updown = target * w
+ return self.finalize_updown(updown, target, output_shape)
diff --git a/modules/lora/network_lokr.py b/modules/lora/network_lokr.py
new file mode 100644
index 000000000..877d4005b
--- /dev/null
+++ b/modules/lora/network_lokr.py
@@ -0,0 +1,57 @@
+import torch
+import modules.lora.lyco_helpers as lyco_helpers
+import modules.lora.network as network
+
+
+class ModuleTypeLokr(network.ModuleType):
+ def create_module(self, net: network.Network, weights: network.NetworkWeights):
+ has_1 = "lokr_w1" in weights.w or ("lokr_w1_a" in weights.w and "lokr_w1_b" in weights.w)
+ has_2 = "lokr_w2" in weights.w or ("lokr_w2_a" in weights.w and "lokr_w2_b" in weights.w)
+ if has_1 and has_2:
+ return NetworkModuleLokr(net, weights)
+ return None
+
+
+def make_kron(orig_shape, w1, w2):
+ if len(w2.shape) == 4:
+ w1 = w1.unsqueeze(2).unsqueeze(2)
+ w2 = w2.contiguous()
+ return torch.kron(w1, w2).reshape(orig_shape)
+
+
+class NetworkModuleLokr(network.NetworkModule): # pylint: disable=abstract-method
+ def __init__(self, net: network.Network, weights: network.NetworkWeights):
+ super().__init__(net, weights)
+ self.w1 = weights.w.get("lokr_w1")
+ self.w1a = weights.w.get("lokr_w1_a")
+ self.w1b = weights.w.get("lokr_w1_b")
+ self.dim = self.w1b.shape[0] if self.w1b is not None else self.dim
+ self.w2 = weights.w.get("lokr_w2")
+ self.w2a = weights.w.get("lokr_w2_a")
+ self.w2b = weights.w.get("lokr_w2_b")
+ self.dim = self.w2b.shape[0] if self.w2b is not None else self.dim
+ self.t2 = weights.w.get("lokr_t2")
+
+ def calc_updown(self, target):
+ if self.w1 is not None:
+ w1 = self.w1.to(target.device, dtype=target.dtype)
+ else:
+ w1a = self.w1a.to(target.device, dtype=target.dtype)
+ w1b = self.w1b.to(target.device, dtype=target.dtype)
+ w1 = w1a @ w1b
+ if self.w2 is not None:
+ w2 = self.w2.to(target.device, dtype=target.dtype)
+ elif self.t2 is None:
+ w2a = self.w2a.to(target.device, dtype=target.dtype)
+ w2b = self.w2b.to(target.device, dtype=target.dtype)
+ w2 = w2a @ w2b
+ else:
+ t2 = self.t2.to(target.device, dtype=target.dtype)
+ w2a = self.w2a.to(target.device, dtype=target.dtype)
+ w2b = self.w2b.to(target.device, dtype=target.dtype)
+ w2 = lyco_helpers.make_weight_cp(t2, w2a, w2b)
+ output_shape = [w1.size(0) * w2.size(0), w1.size(1) * w2.size(1)]
+ if len(target.shape) == 4:
+ output_shape = target.shape
+ updown = make_kron(output_shape, w1, w2)
+ return self.finalize_updown(updown, target, output_shape)
diff --git a/modules/lora/network_lora.py b/modules/lora/network_lora.py
new file mode 100644
index 000000000..6c1d7ea3f
--- /dev/null
+++ b/modules/lora/network_lora.py
@@ -0,0 +1,78 @@
+import torch
+import diffusers.models.lora as diffusers_lora
+import modules.lora.lyco_helpers as lyco_helpers
+import modules.lora.network as network
+from modules import devices
+
+
+class ModuleTypeLora(network.ModuleType):
+ def create_module(self, net: network.Network, weights: network.NetworkWeights):
+ if all(x in weights.w for x in ["lora_up.weight", "lora_down.weight"]):
+ return NetworkModuleLora(net, weights)
+ return None
+
+
+class NetworkModuleLora(network.NetworkModule):
+
+ def __init__(self, net: network.Network, weights: network.NetworkWeights):
+ super().__init__(net, weights)
+ self.up_model = self.create_module(weights.w, "lora_up.weight")
+ self.down_model = self.create_module(weights.w, "lora_down.weight")
+ self.mid_model = self.create_module(weights.w, "lora_mid.weight", none_ok=True)
+ self.dim = weights.w["lora_down.weight"].shape[0]
+
+ def create_module(self, weights, key, none_ok=False):
+ from modules.shared import opts
+ weight = weights.get(key)
+ if weight is None and none_ok:
+ return None
+ linear_modules = [torch.nn.Linear, torch.nn.modules.linear.NonDynamicallyQuantizableLinear, torch.nn.MultiheadAttention, diffusers_lora.LoRACompatibleLinear]
+ is_linear = type(self.sd_module) in linear_modules or self.sd_module.__class__.__name__ in {"NNCFLinear", "QLinear", "Linear4bit"}
+ is_conv = type(self.sd_module) in [torch.nn.Conv2d, diffusers_lora.LoRACompatibleConv] or self.sd_module.__class__.__name__ in {"NNCFConv2d", "QConv2d"}
+ if is_linear:
+ weight = weight.reshape(weight.shape[0], -1)
+ module = torch.nn.Linear(weight.shape[1], weight.shape[0], bias=False)
+ elif is_conv and key == "lora_down.weight" or key == "dyn_up":
+ if len(weight.shape) == 2:
+ weight = weight.reshape(weight.shape[0], -1, 1, 1)
+ if weight.shape[2] != 1 or weight.shape[3] != 1:
+ module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], self.sd_module.kernel_size, self.sd_module.stride, self.sd_module.padding, bias=False)
+ else:
+ module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], (1, 1), bias=False)
+ elif is_conv and key == "lora_mid.weight":
+ module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], self.sd_module.kernel_size, self.sd_module.stride, self.sd_module.padding, bias=False)
+ elif is_conv and key == "lora_up.weight" or key == "dyn_down":
+ module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], (1, 1), bias=False)
+ else:
+ raise AssertionError(f'Lora unsupported: layer={self.network_key} type={type(self.sd_module).__name__}')
+ with torch.no_grad():
+ if weight.shape != module.weight.shape:
+ weight = weight.reshape(module.weight.shape)
+ module.weight.copy_(weight)
+ if opts.lora_load_gpu:
+ module = module.to(device=devices.device, dtype=devices.dtype)
+ module.weight.requires_grad_(False)
+ return module
+
+ def calc_updown(self, target): # pylint: disable=W0237
+ target_dtype = target.dtype if target.dtype != torch.uint8 else self.up_model.weight.dtype
+ up = self.up_model.weight.to(target.device, dtype=target_dtype)
+ down = self.down_model.weight.to(target.device, dtype=target_dtype)
+ output_shape = [up.size(0), down.size(1)]
+ if self.mid_model is not None:
+ # cp-decomposition
+ mid = self.mid_model.weight.to(target.device, dtype=target_dtype)
+ updown = lyco_helpers.rebuild_cp_decomposition(up, down, mid)
+ output_shape += mid.shape[2:]
+ else:
+ if len(down.shape) == 4:
+ output_shape += down.shape[2:]
+ updown = lyco_helpers.rebuild_conventional(up, down, output_shape, self.network.dyn_dim)
+ return self.finalize_updown(updown, target, output_shape)
+
+ def forward(self, x, y):
+ self.up_model.to(device=devices.device)
+ self.down_model.to(device=devices.device)
+ if hasattr(y, "scale"):
+ return y(scale=1) + self.up_model(self.down_model(x)) * self.multiplier() * self.calc_scale()
+ return y + self.up_model(self.down_model(x)) * self.multiplier() * self.calc_scale()
diff --git a/modules/lora/network_norm.py b/modules/lora/network_norm.py
new file mode 100644
index 000000000..e8f1740e3
--- /dev/null
+++ b/modules/lora/network_norm.py
@@ -0,0 +1,23 @@
+import modules.lora.network as network
+
+class ModuleTypeNorm(network.ModuleType):
+ def create_module(self, net: network.Network, weights: network.NetworkWeights):
+ if all(x in weights.w for x in ["w_norm", "b_norm"]):
+ return NetworkModuleNorm(net, weights)
+ return None
+
+
+class NetworkModuleNorm(network.NetworkModule): # pylint: disable=abstract-method
+ def __init__(self, net: network.Network, weights: network.NetworkWeights):
+ super().__init__(net, weights)
+ self.w_norm = weights.w.get("w_norm")
+ self.b_norm = weights.w.get("b_norm")
+
+ def calc_updown(self, target):
+ output_shape = self.w_norm.shape
+ updown = self.w_norm.to(target.device, dtype=target.dtype)
+ if self.b_norm is not None:
+ ex_bias = self.b_norm.to(target.device, dtype=target.dtype)
+ else:
+ ex_bias = None
+ return self.finalize_updown(updown, target, output_shape, ex_bias)
diff --git a/modules/lora/network_oft.py b/modules/lora/network_oft.py
new file mode 100644
index 000000000..808286066
--- /dev/null
+++ b/modules/lora/network_oft.py
@@ -0,0 +1,81 @@
+import torch
+import modules.lora.network as network
+from modules.lora.lyco_helpers import factorization
+from einops import rearrange
+
+
+class ModuleTypeOFT(network.ModuleType):
+ def create_module(self, net: network.Network, weights: network.NetworkWeights):
+ if all(x in weights.w for x in ["oft_blocks"]) or all(x in weights.w for x in ["oft_diag"]):
+ return NetworkModuleOFT(net, weights)
+ return None
+
+# Supports both kohya-ss' implementation of COFT https://github.com/kohya-ss/sd-scripts/blob/main/networks/oft.py
+# and KohakuBlueleaf's implementation of OFT/COFT https://github.com/KohakuBlueleaf/LyCORIS/blob/dev/lycoris/modules/diag_oft.py
+class NetworkModuleOFT(network.NetworkModule): # pylint: disable=abstract-method
+ def __init__(self, net: network.Network, weights: network.NetworkWeights):
+ super().__init__(net, weights)
+ self.lin_module = None
+ self.org_module: list[torch.Module] = [self.sd_module]
+ self.scale = 1.0
+
+ # kohya-ss
+ if "oft_blocks" in weights.w.keys():
+ self.is_kohya = True
+ self.oft_blocks = weights.w["oft_blocks"] # (num_blocks, block_size, block_size)
+ self.alpha = weights.w["alpha"] # alpha is constraint
+ self.dim = self.oft_blocks.shape[0] # lora dim
+ # LyCORIS
+ elif "oft_diag" in weights.w.keys():
+ self.is_kohya = False
+ self.oft_blocks = weights.w["oft_diag"]
+ # self.alpha is unused
+ self.dim = self.oft_blocks.shape[1] # (num_blocks, block_size, block_size)
+
+ is_linear = type(self.sd_module) in [torch.nn.Linear, torch.nn.modules.linear.NonDynamicallyQuantizableLinear]
+ is_conv = type(self.sd_module) in [torch.nn.Conv2d]
+ is_other_linear = type(self.sd_module) in [torch.nn.MultiheadAttention] # unsupported
+
+ if is_linear:
+ self.out_dim = self.sd_module.out_features
+ elif is_conv:
+ self.out_dim = self.sd_module.out_channels
+ elif is_other_linear:
+ self.out_dim = self.sd_module.embed_dim
+
+ if self.is_kohya:
+ self.constraint = self.alpha * self.out_dim
+ self.num_blocks = self.dim
+ self.block_size = self.out_dim // self.dim
+ else:
+ self.constraint = None
+ self.block_size, self.num_blocks = factorization(self.out_dim, self.dim)
+
+ def calc_updown(self, target):
+ oft_blocks = self.oft_blocks.to(target.device, dtype=target.dtype)
+ eye = torch.eye(self.block_size, device=target.device)
+ constraint = self.constraint.to(target.device)
+
+ if self.is_kohya:
+ block_Q = oft_blocks - oft_blocks.transpose(1, 2) # ensure skew-symmetric orthogonal matrix
+ norm_Q = torch.norm(block_Q.flatten()).to(target.device)
+ new_norm_Q = torch.clamp(norm_Q, max=constraint)
+ block_Q = block_Q * ((new_norm_Q + 1e-8) / (norm_Q + 1e-8))
+ mat1 = eye + block_Q
+ mat2 = (eye - block_Q).float().inverse()
+ oft_blocks = torch.matmul(mat1, mat2)
+
+ R = oft_blocks.to(target.device, dtype=target.dtype)
+
+ # This errors out for MultiheadAttention, might need to be handled up-stream
+ merged_weight = rearrange(target, '(k n) ... -> k n ...', k=self.num_blocks, n=self.block_size)
+ merged_weight = torch.einsum(
+ 'k n m, k n ... -> k m ...',
+ R,
+ merged_weight
+ )
+ merged_weight = rearrange(merged_weight, 'k m ... -> (k m) ...')
+
+ updown = merged_weight.to(target.device, dtype=target.dtype) - target
+ output_shape = target.shape
+ return self.finalize_updown(updown, target, output_shape)
diff --git a/modules/lora/network_overrides.py b/modules/lora/network_overrides.py
new file mode 100644
index 000000000..5334f3c1b
--- /dev/null
+++ b/modules/lora/network_overrides.py
@@ -0,0 +1,49 @@
+from modules import shared
+
+
+maybe_diffusers = [ # forced if lora_maybe_diffusers is enabled
+ 'aaebf6360f7d', # sd15-lcm
+ '3d18b05e4f56', # sdxl-lcm
+ 'b71dcb732467', # sdxl-tcd
+ '813ea5fb1c67', # sdxl-turbo
+ # not really needed, but just in case
+ '5a48ac366664', # hyper-sd15-1step
+ 'ee0ff23dcc42', # hyper-sd15-2step
+ 'e476eb1da5df', # hyper-sd15-4step
+ 'ecb844c3f3b0', # hyper-sd15-8step
+ '1ab289133ebb', # hyper-sd15-8step-cfg
+ '4f494295edb1', # hyper-sdxl-8step
+ 'ca14a8c621f8', # hyper-sdxl-8step-cfg
+ '1c88f7295856', # hyper-sdxl-4step
+ 'fdd5dcd1d88a', # hyper-sdxl-2step
+ '8cca3706050b', # hyper-sdxl-1step
+]
+
+force_diffusers = [ # forced always
+ '816d0eed49fd', # flash-sdxl
+ 'c2ec22757b46', # flash-sd15
+]
+
+force_models = [ # forced always
+ 'sc',
+ # 'sd3',
+ 'kandinsky',
+ 'hunyuandit',
+ 'auraflow',
+]
+
+force_classes = [ # forced always
+]
+
+
+def check_override(shorthash=''):
+ force = False
+ force = force or (shared.sd_model_type in force_models)
+ force = force or (shared.sd_model.__class__.__name__ in force_classes)
+ if len(shorthash) < 4:
+ return force
+ force = force or (any(x.startswith(shorthash) for x in maybe_diffusers) if shared.opts.lora_maybe_diffusers else False)
+ force = force or any(x.startswith(shorthash) for x in force_diffusers)
+ if force and shared.opts.lora_maybe_diffusers:
+ shared.log.debug('LoRA override: force diffusers')
+ return force
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
new file mode 100644
index 000000000..762705b67
--- /dev/null
+++ b/modules/lora/networks.py
@@ -0,0 +1,453 @@
+from typing import Union, List
+import os
+import re
+import time
+import concurrent
+import modules.lora.network as network
+import modules.lora.network_lora as network_lora
+import modules.lora.network_hada as network_hada
+import modules.lora.network_ia3 as network_ia3
+import modules.lora.network_oft as network_oft
+import modules.lora.network_lokr as network_lokr
+import modules.lora.network_full as network_full
+import modules.lora.network_norm as network_norm
+import modules.lora.network_glora as network_glora
+import modules.lora.network_overrides as network_overrides
+import modules.lora.lora_convert as lora_convert
+import torch
+import diffusers.models.lora
+from modules import shared, devices, sd_models, sd_models_compile, errors, scripts, files_cache, model_quant
+
+
+debug = os.environ.get('SD_LORA_DEBUG', None) is not None
+extra_network_lora = None
+available_networks = {}
+available_network_aliases = {}
+loaded_networks: List[network.Network] = []
+timer = { 'load': 0, 'apply': 0, 'restore': 0, 'deactivate': 0 }
+lora_cache = {}
+diffuser_loaded = []
+diffuser_scales = []
+available_network_hash_lookup = {}
+forbidden_network_aliases = {}
+re_network_name = re.compile(r"(.*)\s*\([0-9a-fA-F]+\)")
+module_types = [
+ network_lora.ModuleTypeLora(),
+ network_hada.ModuleTypeHada(),
+ network_ia3.ModuleTypeIa3(),
+ network_oft.ModuleTypeOFT(),
+ network_lokr.ModuleTypeLokr(),
+ network_full.ModuleTypeFull(),
+ network_norm.ModuleTypeNorm(),
+ network_glora.ModuleTypeGLora(),
+]
+
+
+def assign_network_names_to_compvis_modules(sd_model):
+ if sd_model is None:
+ return
+ sd_model = getattr(shared.sd_model, "pipe", shared.sd_model) # wrapped model compatiblility
+ network_layer_mapping = {}
+ if hasattr(sd_model, 'text_encoder') and sd_model.text_encoder is not None:
+ for name, module in sd_model.text_encoder.named_modules():
+ prefix = "lora_te1_" if hasattr(sd_model, 'text_encoder_2') else "lora_te_"
+ network_name = prefix + name.replace(".", "_")
+ network_layer_mapping[network_name] = module
+ module.network_layer_name = network_name
+ if hasattr(sd_model, 'text_encoder_2'):
+ for name, module in sd_model.text_encoder_2.named_modules():
+ network_name = "lora_te2_" + name.replace(".", "_")
+ network_layer_mapping[network_name] = module
+ module.network_layer_name = network_name
+ if hasattr(sd_model, 'unet'):
+ for name, module in sd_model.unet.named_modules():
+ network_name = "lora_unet_" + name.replace(".", "_")
+ network_layer_mapping[network_name] = module
+ module.network_layer_name = network_name
+ if hasattr(sd_model, 'transformer'):
+ for name, module in sd_model.transformer.named_modules():
+ network_name = "lora_transformer_" + name.replace(".", "_")
+ network_layer_mapping[network_name] = module
+ if "norm" in network_name and "linear" not in network_name and shared.sd_model_type != "sd3":
+ continue
+ module.network_layer_name = network_name
+ shared.sd_model.network_layer_mapping = network_layer_mapping
+
+
+def load_diffusers(name, network_on_disk, lora_scale=shared.opts.extra_networks_default_multiplier) -> network.Network | None:
+ name = name.replace(".", "_")
+ shared.log.debug(f'Load network: type=LoRA name="{name}" file="{network_on_disk.filename}" detected={network_on_disk.sd_version} method=diffusers scale={lora_scale} fuse={shared.opts.lora_fuse_diffusers}')
+ if not shared.native:
+ return None
+ if not hasattr(shared.sd_model, 'load_lora_weights'):
+ shared.log.error(f'Load network: type=LoRA class={shared.sd_model.__class__} does not implement load lora')
+ return None
+ try:
+ shared.sd_model.load_lora_weights(network_on_disk.filename, adapter_name=name)
+ except Exception as e:
+ if 'already in use' in str(e):
+ pass
+ else:
+ if 'The following keys have not been correctly renamed' in str(e):
+ shared.log.error(f'Load network: type=LoRA name="{name}" diffusers unsupported format')
+ else:
+ shared.log.error(f'Load network: type=LoRA name="{name}" {e}')
+ if debug:
+ errors.display(e, "LoRA")
+ return None
+ if name not in diffuser_loaded:
+ diffuser_loaded.append(name)
+ diffuser_scales.append(lora_scale)
+ net = network.Network(name, network_on_disk)
+ net.mtime = os.path.getmtime(network_on_disk.filename)
+ return net
+
+
+def load_network(name, network_on_disk) -> network.Network | None:
+ if not shared.sd_loaded:
+ return None
+
+ cached = lora_cache.get(name, None)
+ if debug:
+ shared.log.debug(f'Load network: type=LoRA name="{name}" file="{network_on_disk.filename}" type=lora {"cached" if cached else ""}')
+ if cached is not None:
+ return cached
+ net = network.Network(name, network_on_disk)
+ net.mtime = os.path.getmtime(network_on_disk.filename)
+ sd = sd_models.read_state_dict(network_on_disk.filename, what='network')
+ if shared.sd_model_type == 'f1': # if kohya flux lora, convert state_dict
+ sd = lora_convert._convert_kohya_flux_lora_to_diffusers(sd) or sd # pylint: disable=protected-access
+ if shared.sd_model_type == 'sd3': # if kohya flux lora, convert state_dict
+ try:
+ sd = lora_convert._convert_kohya_sd3_lora_to_diffusers(sd) or sd # pylint: disable=protected-access
+ except ValueError: # EAFP for diffusers PEFT keys
+ pass
+ assign_network_names_to_compvis_modules(shared.sd_model)
+ keys_failed_to_match = {}
+ matched_networks = {}
+ bundle_embeddings = {}
+ convert = lora_convert.KeyConvert()
+ for key_network, weight in sd.items():
+ parts = key_network.split('.')
+ if parts[0] == "bundle_emb":
+ emb_name, vec_name = parts[1], key_network.split(".", 2)[-1]
+ emb_dict = bundle_embeddings.get(emb_name, {})
+ emb_dict[vec_name] = weight
+ bundle_embeddings[emb_name] = emb_dict
+ continue
+ if len(parts) > 5: # messy handler for diffusers peft lora
+ key_network_without_network_parts = '_'.join(parts[:-2])
+ if not key_network_without_network_parts.startswith('lora_'):
+ key_network_without_network_parts = 'lora_' + key_network_without_network_parts
+ network_part = '.'.join(parts[-2:]).replace('lora_A', 'lora_down').replace('lora_B', 'lora_up')
+ else:
+ key_network_without_network_parts, network_part = key_network.split(".", 1)
+ key, sd_module = convert(key_network_without_network_parts)
+ if sd_module is None:
+ keys_failed_to_match[key_network] = key
+ continue
+ if key not in matched_networks:
+ matched_networks[key] = network.NetworkWeights(network_key=key_network, sd_key=key, w={}, sd_module=sd_module)
+ matched_networks[key].w[network_part] = weight
+ network_types = []
+ for key, weights in matched_networks.items():
+ net_module = None
+ for nettype in module_types:
+ net_module = nettype.create_module(net, weights)
+ if net_module is not None:
+ network_types.append(nettype.__class__.__name__)
+ break
+ if net_module is None:
+ shared.log.error(f'LoRA unhandled: name={name} key={key} weights={weights.w.keys()}')
+ else:
+ net.modules[key] = net_module
+ if len(keys_failed_to_match) > 0:
+ shared.log.warning(f'LoRA name="{name}" type={set(network_types)} unmatched={len(keys_failed_to_match)} matched={len(matched_networks)}')
+ if debug:
+ shared.log.debug(f'LoRA name="{name}" unmatched={keys_failed_to_match}')
+ else:
+ shared.log.debug(f'LoRA name="{name}" type={set(network_types)} keys={len(matched_networks)}')
+ if len(matched_networks) == 0:
+ return None
+ lora_cache[name] = net
+ net.bundle_embeddings = bundle_embeddings
+ return net
+
+def maybe_recompile_model(names, te_multipliers):
+ recompile_model = False
+ if shared.compiled_model_state is not None and shared.compiled_model_state.is_compiled:
+ if len(names) == len(shared.compiled_model_state.lora_model):
+ for i, name in enumerate(names):
+ if shared.compiled_model_state.lora_model[
+ i] != f"{name}:{te_multipliers[i] if te_multipliers else shared.opts.extra_networks_default_multiplier}":
+ recompile_model = True
+ shared.compiled_model_state.lora_model = []
+ break
+ if not recompile_model:
+ if len(loaded_networks) > 0 and debug:
+ shared.log.debug('Model Compile: Skipping LoRa loading')
+ return
+ else:
+ recompile_model = True
+ shared.compiled_model_state.lora_model = []
+ if recompile_model:
+ backup_cuda_compile = shared.opts.cuda_compile
+ sd_models.unload_model_weights(op='model')
+ shared.opts.cuda_compile = []
+ sd_models.reload_model_weights(op='model')
+ shared.opts.cuda_compile = backup_cuda_compile
+ return recompile_model
+
+
+def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=None):
+ networks_on_disk: list[network.NetworkOnDisk] = [available_network_aliases.get(name, None) for name in names]
+ if any(x is None for x in networks_on_disk):
+ list_available_networks()
+ networks_on_disk: list[network.NetworkOnDisk] = [available_network_aliases.get(name, None) for name in names]
+ failed_to_load_networks = []
+ recompile_model = maybe_recompile_model(names, te_multipliers)
+
+ loaded_networks.clear()
+ diffuser_loaded.clear()
+ diffuser_scales.clear()
+ timer['load'] = 0
+ t0 = time.time()
+
+ for i, (network_on_disk, name) in enumerate(zip(networks_on_disk, names)):
+ net = None
+ if network_on_disk is not None:
+ shorthash = getattr(network_on_disk, 'shorthash', '').lower()
+ if debug:
+ shared.log.debug(f'Load network: type=LoRA name="{name}" file="{network_on_disk.filename}" hash="{shorthash}"')
+ try:
+ if recompile_model:
+ shared.compiled_model_state.lora_model.append(f"{name}:{te_multipliers[i] if te_multipliers else shared.opts.extra_networks_default_multiplier}")
+ if shared.opts.lora_force_diffusers or network_overrides.check_override(shorthash): # OpenVINO only works with Diffusers LoRa loading
+ net = load_diffusers(name, network_on_disk, lora_scale=te_multipliers[i] if te_multipliers else shared.opts.extra_networks_default_multiplier)
+ else:
+ net = load_network(name, network_on_disk)
+ if net is not None:
+ net.mentioned_name = name
+ network_on_disk.read_hash()
+ except Exception as e:
+ shared.log.error(f'Load network: type=LoRA file="{network_on_disk.filename}" {e}')
+ if debug:
+ errors.display(e, 'LoRA')
+ continue
+ if net is None:
+ failed_to_load_networks.append(name)
+ shared.log.error(f'Load network: type=LoRA name="{name}" detected={network_on_disk.sd_version if network_on_disk is not None else None} failed')
+ continue
+ shared.sd_model.embedding_db.load_diffusers_embedding(None, net.bundle_embeddings)
+ net.te_multiplier = te_multipliers[i] if te_multipliers else shared.opts.extra_networks_default_multiplier
+ net.unet_multiplier = unet_multipliers[i] if unet_multipliers else shared.opts.extra_networks_default_multiplier
+ net.dyn_dim = dyn_dims[i] if dyn_dims else shared.opts.extra_networks_default_multiplier
+ loaded_networks.append(net)
+
+ while len(lora_cache) > shared.opts.lora_in_memory_limit:
+ name = next(iter(lora_cache))
+ lora_cache.pop(name, None)
+
+ if len(diffuser_loaded) > 0:
+ shared.log.debug(f'Load network: type=LoRA loaded={diffuser_loaded} available={shared.sd_model.get_list_adapters()} active={shared.sd_model.get_active_adapters()} scales={diffuser_scales}')
+ try:
+ shared.sd_model.set_adapters(adapter_names=diffuser_loaded, adapter_weights=diffuser_scales)
+ if shared.opts.lora_fuse_diffusers:
+ shared.sd_model.fuse_lora(adapter_names=diffuser_loaded, lora_scale=1.0, fuse_unet=True, fuse_text_encoder=True) # fuse uses fixed scale since later apply does the scaling
+ shared.sd_model.unload_lora_weights()
+ except Exception as e:
+ shared.log.error(f'Load network: type=LoRA {e}')
+ if debug:
+ errors.display(e, 'LoRA')
+
+ if len(loaded_networks) > 0 and debug:
+ shared.log.debug(f'Load network: type=LoRA loaded={len(loaded_networks)} cache={list(lora_cache)}')
+
+ devices.torch_gc()
+
+ if recompile_model:
+ shared.log.info("Load network: type=LoRA recompiling model")
+ backup_lora_model = shared.compiled_model_state.lora_model
+ if 'Model' in shared.opts.cuda_compile:
+ shared.sd_model = sd_models_compile.compile_diffusers(shared.sd_model)
+
+ shared.compiled_model_state.lora_model = backup_lora_model
+ if shared.opts.diffusers_offload_mode == "balanced":
+ sd_models.apply_balanced_offload(shared.sd_model)
+ t1 = time.time()
+ timer['load'] += t1 - t0
+
+def set_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown, ex_bias):
+ weights_backup = getattr(self, "network_weights_backup", None)
+ bias_backup = getattr(self, "network_bias_backup", None)
+ if weights_backup is None and bias_backup is None:
+ return
+ device = self.weight.device
+ with devices.inference_context():
+ if weights_backup is not None:
+ if updown is not None:
+ if len(weights_backup.shape) == 4 and weights_backup.shape[1] == 9:
+ # inpainting model. zero pad updown to make channel[1] 4 to 9
+ updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5)) # pylint: disable=not-callable
+ weights_backup = weights_backup.clone().to(device)
+ weights_backup += updown.to(weights_backup)
+ if getattr(self, "quant_type", None) in ['nf4', 'fp4']:
+ bnb = model_quant.load_bnb('Load network: type=LoRA', silent=True)
+ if bnb is not None:
+ self.weight = bnb.nn.Params4bit(weights_backup, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
+ else:
+ self.weight.copy_(weights_backup, non_blocking=True)
+ else:
+ self.weight.copy_(weights_backup, non_blocking=True)
+ if hasattr(self, "qweight") and hasattr(self, "freeze"):
+ self.freeze()
+ if bias_backup is not None:
+ if ex_bias is not None:
+ bias_backup = bias_backup.clone() + ex_bias.to(weights_backup)
+ self.bias.copy_(bias_backup)
+ else:
+ self.bias = None
+ self.to(device)
+
+
+def maybe_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], wanted_names): # pylint: disable=W0613
+ weights_backup = getattr(self, "network_weights_backup", None)
+ if weights_backup is None and wanted_names != (): # pylint: disable=C1803
+ if getattr(self.weight, "quant_type", None) in ['nf4', 'fp4']:
+ bnb = model_quant.load_bnb('Load network: type=LoRA', silent=True)
+ if bnb is not None:
+ with devices.inference_context():
+ weights_backup = bnb.functional.dequantize_4bit(self.weight, quant_state=self.weight.quant_state, quant_type=self.weight.quant_type, blocksize=self.weight.blocksize,)
+ self.quant_state = self.weight.quant_state
+ self.quant_type = self.weight.quant_type
+ self.blocksize = self.weight.blocksize
+ else:
+ weights_backup = self.weight.clone()
+ else:
+ weights_backup = self.weight.clone()
+ if shared.opts.lora_offload_backup and weights_backup is not None:
+ weights_backup = weights_backup.to(devices.cpu)
+ self.network_weights_backup = weights_backup
+ bias_backup = getattr(self, "network_bias_backup", None)
+ if bias_backup is None:
+ if getattr(self, 'bias', None) is not None:
+ bias_backup = self.bias.clone()
+ else:
+ bias_backup = None
+ if shared.opts.lora_offload_backup and bias_backup is not None:
+ bias_backup = bias_backup.to(devices.cpu)
+ self.network_bias_backup = bias_backup
+
+
+def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv]):
+ """
+ Applies the currently selected set of networks to the weights of torch layer self.
+ If weights already have this particular set of networks applied, does nothing.
+ If not, restores orginal weights from backup and alters weights according to networks.
+ """
+ network_layer_name = getattr(self, 'network_layer_name', None)
+ if network_layer_name is None:
+ return
+ t0 = time.time()
+ current_names = getattr(self, "network_current_names", ())
+ wanted_names = tuple((x.name, x.te_multiplier, x.unet_multiplier, x.dyn_dim) for x in loaded_networks)
+ if any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419 # pylint: disable=R1729
+ maybe_backup_weights(self, wanted_names)
+ if current_names != wanted_names:
+ for net in loaded_networks:
+ # default workflow where module is known and has weights
+ module = net.modules.get(network_layer_name, None)
+ if module is not None and hasattr(self, 'weight'):
+ try:
+ with devices.inference_context():
+ weight = self.weight # calculate quant weights once
+ updown, ex_bias = module.calc_updown(weight)
+ set_weights(self, updown, ex_bias)
+ except RuntimeError as e:
+ extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
+ if debug:
+ module_name = net.modules.get(network_layer_name, None)
+ shared.log.error(f'LoRA apply weight name="{net.name}" module="{module_name}" layer="{network_layer_name}" {e}')
+ errors.display(e, 'LoRA')
+ raise RuntimeError('LoRA apply weight') from e
+ continue
+ if module is None:
+ continue
+ shared.log.warning(f'LoRA network="{net.name}" layer="{network_layer_name}" unsupported operation')
+ extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
+ if not loaded_networks: # restore from backup
+ t5 = time.time()
+ set_weights(self, None, None)
+ self.network_current_names = wanted_names
+ t1 = time.time()
+ timer['apply'] += t1 - t0
+
+def network_load():
+ sd_model = getattr(shared.sd_model, "pipe", shared.sd_model) # wrapped model compatiblility
+ for component_name in ['text_encoder','text_encoder_2', 'unet', 'transformer']:
+ component = getattr(sd_model, component_name, None)
+ if component is not None:
+ for _, module in component.named_modules():
+ network_apply_weights(module)
+
+
+def list_available_networks():
+ t0 = time.time()
+ available_networks.clear()
+ available_network_aliases.clear()
+ forbidden_network_aliases.clear()
+ available_network_hash_lookup.clear()
+ forbidden_network_aliases.update({"none": 1, "Addams": 1})
+ if not os.path.exists(shared.cmd_opts.lora_dir):
+ shared.log.warning(f'LoRA directory not found: path="{shared.cmd_opts.lora_dir}"')
+
+ def add_network(filename):
+ if not os.path.isfile(filename):
+ return
+ name = os.path.splitext(os.path.basename(filename))[0]
+ name = name.replace('.', '_')
+ try:
+ entry = network.NetworkOnDisk(name, filename)
+ available_networks[entry.name] = entry
+ if entry.alias in available_network_aliases:
+ forbidden_network_aliases[entry.alias.lower()] = 1
+ if shared.opts.lora_preferred_name == 'filename':
+ available_network_aliases[entry.name] = entry
+ else:
+ available_network_aliases[entry.alias] = entry
+ if entry.shorthash:
+ available_network_hash_lookup[entry.shorthash] = entry
+ except OSError as e: # should catch FileNotFoundError and PermissionError etc.
+ shared.log.error(f'LoRA: filename="{filename}" {e}')
+
+ candidates = list(files_cache.list_files(shared.cmd_opts.lora_dir, ext_filter=[".pt", ".ckpt", ".safetensors"]))
+ with concurrent.futures.ThreadPoolExecutor(max_workers=shared.max_workers) as executor:
+ for fn in candidates:
+ executor.submit(add_network, fn)
+ t1 = time.time()
+ shared.log.info(f'Available LoRAs: path="{shared.cmd_opts.lora_dir}" items={len(available_networks)} folders={len(forbidden_network_aliases)} time={t1 - t0:.2f}')
+
+
+def infotext_pasted(infotext, params): # pylint: disable=W0613
+ if "AddNet Module 1" in [x[1] for x in scripts.scripts_txt2img.infotext_fields]:
+ return # if the other extension is active, it will handle those fields, no need to do anything
+ added = []
+ for k in params:
+ if not k.startswith("AddNet Model "):
+ continue
+ num = k[13:]
+ if params.get("AddNet Module " + num) != "LoRA":
+ continue
+ name = params.get("AddNet Model " + num)
+ if name is None:
+ continue
+ m = re_network_name.match(name)
+ if m:
+ name = m.group(1)
+ multiplier = params.get("AddNet Weight A " + num, "1.0")
+ added.append(f"")
+ if added:
+ params["Prompt"] += "\n" + "".join(added)
+
+
+list_available_networks()
diff --git a/modules/lora/ui_extra_networks_lora.py b/modules/lora/ui_extra_networks_lora.py
new file mode 100644
index 000000000..73cce47a3
--- /dev/null
+++ b/modules/lora/ui_extra_networks_lora.py
@@ -0,0 +1,123 @@
+import os
+import json
+import concurrent
+import modules.lora.networks as networks
+from modules import shared, ui_extra_networks
+
+
+debug = os.environ.get('SD_LORA_DEBUG', None) is not None
+
+
+class ExtraNetworksPageLora(ui_extra_networks.ExtraNetworksPage):
+ def __init__(self):
+ super().__init__('Lora')
+ self.list_time = 0
+
+ def refresh(self):
+ networks.list_available_networks()
+
+ @staticmethod
+ def get_tags(l, info):
+ tags = {}
+ try:
+ if l.metadata is not None:
+ modelspec_tags = l.metadata.get('modelspec.tags', {})
+ possible_tags = l.metadata.get('ss_tag_frequency', {}) # tags from model metedata
+ if isinstance(possible_tags, str):
+ possible_tags = {}
+ if isinstance(modelspec_tags, str):
+ modelspec_tags = {}
+ if len(list(modelspec_tags)) > 0:
+ possible_tags.update(modelspec_tags)
+ for k, v in possible_tags.items():
+ words = k.split('_', 1) if '_' in k else [v, k]
+ words = [str(w).replace('.json', '') for w in words]
+ if words[0] == '{}':
+ words[0] = 0
+ tag = ' '.join(words[1:]).lower()
+ tags[tag] = words[0]
+
+ def find_version():
+ found_versions = []
+ current_hash = l.hash[:8].upper()
+ all_versions = info.get('modelVersions', [])
+ for v in info.get('modelVersions', []):
+ for f in v.get('files', []):
+ if any(h.startswith(current_hash) for h in f.get('hashes', {}).values()):
+ found_versions.append(v)
+ if len(found_versions) == 0:
+ found_versions = all_versions
+ return found_versions
+
+ for v in find_version(): # trigger words from info json
+ possible_tags = v.get('trainedWords', [])
+ if isinstance(possible_tags, list):
+ for tag_str in possible_tags:
+ for tag in tag_str.split(','):
+ tag = tag.strip().lower()
+ if tag not in tags:
+ tags[tag] = 0
+
+ possible_tags = info.get('tags', []) # tags from info json
+ if not isinstance(possible_tags, list):
+ possible_tags = list(possible_tags.values())
+ for tag in possible_tags:
+ tag = tag.strip().lower()
+ if tag not in tags:
+ tags[tag] = 0
+ except Exception:
+ pass
+ bad_chars = [';', ':', '<', ">", "*", '?', '\'', '\"', '(', ')', '[', ']', '{', '}', '\\', '/']
+ clean_tags = {}
+ for k, v in tags.items():
+ tag = ''.join(i for i in k if i not in bad_chars).strip()
+ clean_tags[tag] = v
+
+ clean_tags.pop('img', None)
+ clean_tags.pop('dataset', None)
+ return clean_tags
+
+ def create_item(self, name):
+ l = networks.available_networks.get(name)
+ if l is None:
+ shared.log.warning(f'Networks: type=lora registered={len(list(networks.available_networks))} file="{name}" not registered')
+ return None
+ try:
+ # path, _ext = os.path.splitext(l.filename)
+ name = os.path.splitext(os.path.relpath(l.filename, shared.cmd_opts.lora_dir))[0]
+ item = {
+ "type": 'Lora',
+ "name": name,
+ "filename": l.filename,
+ "hash": l.shorthash,
+ "prompt": json.dumps(f" "),
+ "metadata": json.dumps(l.metadata, indent=4) if l.metadata else None,
+ "mtime": os.path.getmtime(l.filename),
+ "size": os.path.getsize(l.filename),
+ "version": l.sd_version,
+ }
+ info = self.find_info(l.filename)
+ item["info"] = info
+ item["description"] = self.find_description(l.filename, info) # use existing info instead of double-read
+ item["tags"] = self.get_tags(l, info)
+ return item
+ except Exception as e:
+ shared.log.error(f'Networks: type=lora file="{name}" {e}')
+ if debug:
+ from modules import errors
+ errors.display(e, 'Lora')
+ return None
+
+ def list_items(self):
+ items = []
+ with concurrent.futures.ThreadPoolExecutor(max_workers=shared.max_workers) as executor:
+ future_items = {executor.submit(self.create_item, net): net for net in networks.available_networks}
+ for future in concurrent.futures.as_completed(future_items):
+ item = future.result()
+ if item is not None:
+ items.append(item)
+ self.update_all_previews(items)
+ return items
+
+ def allowed_directories_for_previews(self):
+ return [shared.cmd_opts.lora_dir, shared.cmd_opts.lyco_dir]
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index 2164134b1..83d3b1b69 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -8,6 +8,8 @@
from modules.processing_helpers import resize_hires, calculate_base_steps, calculate_hires_steps, calculate_refiner_steps, save_intermediate, update_sampler, is_txt2img, is_refiner_enabled
from modules.processing_args import set_pipeline_args
from modules.onnx_impl import preprocess_pipeline as preprocess_onnx_pipeline, check_parameters_changed as olive_check_parameters_changed
+from modules.lora.networks import network_load
+from modules.lora.networks import timer as network_timer
debug = shared.log.trace if os.environ.get('SD_DIFFUSERS_DEBUG', None) is not None else lambda *args, **kwargs: None
@@ -424,6 +426,9 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
p.prompts = p.all_prompts[p.iteration * p.batch_size:(p.iteration+1) * p.batch_size]
if p.negative_prompts is None or len(p.negative_prompts) == 0:
p.negative_prompts = p.all_negative_prompts[p.iteration * p.batch_size:(p.iteration+1) * p.batch_size]
+ network_timer['apply'] = 0
+ network_timer['restore'] = 0
+ network_load()
sd_models.move_model(shared.sd_model, devices.device)
sd_models_compile.openvino_recompile_model(p, hires=False, refiner=False) # recompile if a parameter changes
diff --git a/modules/shared.py b/modules/shared.py
index a89cbbc95..a5af83f5e 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -908,6 +908,7 @@ def get_default_modes():
"lora_in_memory_limit": OptionInfo(0, "LoRA memory cache", gr.Slider, {"minimum": 0, "maximum": 24, "step": 1}),
"lora_quant": OptionInfo("NF4","LoRA precision in quantized models", gr.Radio, {"choices": ["NF4", "FP4"]}),
"lora_load_gpu": OptionInfo(True if not cmd_opts.lowvram else False, "Load LoRA directly to GPU"),
+ "lora_offload_backup": OptionInfo(True, "Offload LoRA Backup Weights"),
}))
options_templates.update(options_section((None, "Internal options"), {
diff --git a/scripts/lora_script.py b/scripts/lora_script.py
new file mode 100644
index 000000000..a153a2caa
--- /dev/null
+++ b/scripts/lora_script.py
@@ -0,0 +1,62 @@
+import re
+import modules.lora.networks as networks
+from modules.lora.lora_extract import create_ui
+from modules.lora.network import NetworkOnDisk
+from modules.lora.ui_extra_networks_lora import ExtraNetworksPageLora
+from modules.lora.extra_networks_lora import ExtraNetworkLora
+from modules import script_callbacks, extra_networks, ui_extra_networks, ui_models, shared # pylint: disable=unused-import
+
+
+re_lora = re.compile("
Date: Sun, 24 Nov 2024 13:02:45 -0500
Subject: [PATCH 008/162] correct lora assignment
Signed-off-by: Vladimir Mandic
---
extensions-builtin/Lora/networks.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py
index db617ee5b..fd6287c62 100644
--- a/extensions-builtin/Lora/networks.py
+++ b/extensions-builtin/Lora/networks.py
@@ -88,7 +88,7 @@ def assign_network_names_to_compvis_modules(sd_model):
network_name = name.replace(".", "_")
network_layer_mapping[network_name] = module
module.network_layer_name = network_name
- shared.sd_model.network_layer_mapping = network_layer_mapping
+ sd_model.network_layer_mapping = network_layer_mapping
def load_diffusers(name, network_on_disk, lora_scale=shared.opts.extra_networks_default_multiplier) -> network.Network:
@@ -141,7 +141,7 @@ def load_network(name, network_on_disk) -> network.Network:
sd = sd_models.read_state_dict(network_on_disk.filename, what='network')
if shared.sd_model_type == 'f1': # if kohya flux lora, convert state_dict
sd = lora_convert._convert_kohya_flux_lora_to_diffusers(sd) or sd # pylint: disable=protected-access
- assign_network_names_to_compvis_modules(shared.sd_model) # this should not be needed but is here as an emergency fix for an unknown error people are experiencing in 1.2.0
+ assign_network_names_to_compvis_modules(shared.sd_model)
keys_failed_to_match = {}
matched_networks = {}
bundle_embeddings = {}
From fdb8cb509ef50722fa6c9cb61c90b831927412fd Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Sun, 24 Nov 2024 13:49:27 -0500
Subject: [PATCH 009/162] force move te when using xhinker
Signed-off-by: Vladimir Mandic
---
modules/prompt_parser_diffusers.py | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py
index 234272907..bfecfade6 100644
--- a/modules/prompt_parser_diffusers.py
+++ b/modules/prompt_parser_diffusers.py
@@ -583,13 +583,13 @@ def get_xhinker_text_embeddings(pipe, prompt: str = "", neg_prompt: str = "", cl
te1_device, te2_device, te3_device = None, None, None
if hasattr(pipe, "text_encoder") and pipe.text_encoder.device != devices.device:
te1_device = pipe.text_encoder.device
- sd_models.move_model(pipe.text_encoder, devices.device)
+ sd_models.move_model(pipe.text_encoder, devices.device, force=True)
if hasattr(pipe, "text_encoder_2") and pipe.text_encoder_2.device != devices.device:
te2_device = pipe.text_encoder_2.device
- sd_models.move_model(pipe.text_encoder_2, devices.device)
+ sd_models.move_model(pipe.text_encoder_2, devices.device, force=True)
if hasattr(pipe, "text_encoder_3") and pipe.text_encoder_3.device != devices.device:
te3_device = pipe.text_encoder_3.device
- sd_models.move_model(pipe.text_encoder_3, devices.device)
+ sd_models.move_model(pipe.text_encoder_3, devices.device, force=True)
if is_sd3:
prompt_embed, negative_embed, positive_pooled, negative_pooled = get_weighted_text_embeddings_sd3(pipe=pipe, prompt=prompt, neg_prompt=neg_prompt, use_t5_encoder=bool(pipe.text_encoder_3))
@@ -601,10 +601,10 @@ def get_xhinker_text_embeddings(pipe, prompt: str = "", neg_prompt: str = "", cl
prompt_embed, negative_embed = get_weighted_text_embeddings_sd15(pipe=pipe, prompt=prompt, neg_prompt=neg_prompt, clip_skip=clip_skip)
if te1_device is not None:
- sd_models.move_model(pipe.text_encoder, te1_device)
+ sd_models.move_model(pipe.text_encoder, te1_device, force=True)
if te2_device is not None:
- sd_models.move_model(pipe.text_encoder_2, te1_device)
+ sd_models.move_model(pipe.text_encoder_2, te1_device, force=True)
if te3_device is not None:
- sd_models.move_model(pipe.text_encoder_3, te1_device)
+ sd_models.move_model(pipe.text_encoder_3, te1_device, force=True)
return prompt_embed, positive_pooled, negative_embed, negative_pooled
From e4038feff1ad6548def3553fa543e17b0fe5e34a Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Sun, 24 Nov 2024 15:05:58 -0500
Subject: [PATCH 010/162] minor updates
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 2 +-
modules/prompt_parser_diffusers.py | 4 ++--
modules/sd_models.py | 7 +++++--
modules/sd_samplers_diffusers.py | 14 +++++++-------
wiki | 2 +-
5 files changed, 16 insertions(+), 13 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ad77bbbe5..cbd29840d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
# Change Log for SD.Next
-## Update for 2024-11-23
+## Update for 2024-11-24
- [Flux Tools](https://blackforestlabs.ai/flux-1-tools/):
**Redux** is actually a tool, **Fill** is inpaint/outpaint optimized version of *Flux-dev*
diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py
index bfecfade6..2edef4bf5 100644
--- a/modules/prompt_parser_diffusers.py
+++ b/modules/prompt_parser_diffusers.py
@@ -162,7 +162,7 @@ def extend_embeds(self, batchidx, idx): # Extends scheduled prompt via index
def encode(self, pipe, positive_prompt, negative_prompt, batchidx):
self.attention = shared.opts.prompt_attention
- if self.attention == "xhinker" or 'Flux' in pipe.__class__.__name__:
+ if self.attention == "xhinker":
prompt_embed, positive_pooled, negative_embed, negative_pooled = get_xhinker_text_embeddings(pipe, positive_prompt, negative_prompt, self.clip_skip)
else:
prompt_embed, positive_pooled, negative_embed, negative_pooled = get_weighted_text_embeddings(pipe, positive_prompt, negative_prompt, self.clip_skip)
@@ -591,7 +591,7 @@ def get_xhinker_text_embeddings(pipe, prompt: str = "", neg_prompt: str = "", cl
te3_device = pipe.text_encoder_3.device
sd_models.move_model(pipe.text_encoder_3, devices.device, force=True)
- if is_sd3:
+ if 'StableDiffusion3' in pipe.__class__.__name__:
prompt_embed, negative_embed, positive_pooled, negative_pooled = get_weighted_text_embeddings_sd3(pipe=pipe, prompt=prompt, neg_prompt=neg_prompt, use_t5_encoder=bool(pipe.text_encoder_3))
elif 'Flux' in pipe.__class__.__name__:
prompt_embed, positive_pooled = get_weighted_text_embeddings_flux1(pipe=pipe, prompt=prompt, prompt2=prompt_2, device=devices.device)
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 2ad204b46..aab35af18 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -16,7 +16,7 @@
from omegaconf import OmegaConf
from ldm.util import instantiate_from_config
from modules import paths, shared, shared_state, modelloader, devices, script_callbacks, sd_vae, sd_unet, errors, sd_models_config, sd_models_compile, sd_hijack_accelerate, sd_detect
-from modules.timer import Timer
+from modules.timer import Timer, process as process_timer
from modules.memstats import memory_stats
from modules.modeldata import model_data
from modules.sd_checkpoint import CheckpointInfo, select_checkpoint, list_models, checkpoints_list, checkpoint_titles, get_closet_checkpoint_match, model_hash, update_model_hashes, setup_model, write_metadata, read_metadata_from_safetensors # pylint: disable=unused-import
@@ -512,7 +512,10 @@ def move_model(model, device=None, force=False):
except Exception as e1:
t1 = time.time()
shared.log.error(f'Model move: device={device} {e1}')
- if os.environ.get('SD_MOVE_DEBUG', None) or (t1-t0) > 0.1:
+ if 'move' not in process_timer.records:
+ process_timer.records['move'] = 0
+ process_timer.records['move'] += t1 - t0
+ if os.environ.get('SD_MOVE_DEBUG', None) or (t1-t0) > 1:
shared.log.debug(f'Model move: device={device} class={model.__class__.__name__} accelerate={getattr(model, "has_accelerate", False)} fn={fn} time={t1-t0:.2f}') # pylint: disable=protected-access
devices.torch_gc()
diff --git a/modules/sd_samplers_diffusers.py b/modules/sd_samplers_diffusers.py
index 60c75b64e..9f24d5a91 100644
--- a/modules/sd_samplers_diffusers.py
+++ b/modules/sd_samplers_diffusers.py
@@ -80,13 +80,13 @@
'DPM++ Cosine': { 'solver_order': 2, 'sigma_schedule': "exponential", 'prediction_type': "v-prediction" },
'DPM SDE': { 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_beta_sigmas': False, 'noise_sampler_seed': None, 'timestep_spacing': 'linspace', 'steps_offset': 0, },
- 'DPM2 FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver2', 'use_noise_sampler': True },
- 'DPM2a FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver2A', 'use_noise_sampler': True },
- 'DPM2++ 2M FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++2M', 'use_noise_sampler': True },
- 'DPM2++ 2S FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++2S', 'use_noise_sampler': True },
- 'DPM2++ SDE FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++sde', 'use_noise_sampler': True },
- 'DPM2++ 2M SDE FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++2Msde', 'use_noise_sampler': True },
- 'DPM2++ 3M SDE FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 3, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++3Msde', 'use_noise_sampler': True },
+ 'DPM2 FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver2', 'use_noise_sampler': True, 'beta_start': 0.00085, 'beta_end': 0.012 },
+ 'DPM2a FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver2A', 'use_noise_sampler': True, 'beta_start': 0.00085, 'beta_end': 0.012 },
+ 'DPM2++ 2M FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++2M', 'use_noise_sampler': True, 'beta_start': 0.00085, 'beta_end': 0.012 },
+ 'DPM2++ 2S FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++2S', 'use_noise_sampler': True, 'beta_start': 0.00085, 'beta_end': 0.012 },
+ 'DPM2++ SDE FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++sde', 'use_noise_sampler': True, 'beta_start': 0.00085, 'beta_end': 0.012 },
+ 'DPM2++ 2M SDE FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++2Msde', 'use_noise_sampler': True, 'beta_start': 0.00085, 'beta_end': 0.012 },
+ 'DPM2++ 3M SDE FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 3, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++3Msde', 'use_noise_sampler': True, 'beta_start': 0.00085, 'beta_end': 0.012 },
'Heun': { 'use_beta_sigmas': False, 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'timestep_spacing': 'linspace' },
'Heun FlowMatch': { 'timestep_spacing': "linspace", 'shift': 1 },
diff --git a/wiki b/wiki
index 313a6b911..ba7d78b55 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit 313a6b911bd239b4fa8092ed89b936428214342e
+Subproject commit ba7d78b55eb95afe8509bd0069b8ec345b259f21
From fccd1ed364e35721c01881684caaad8166294ceb Mon Sep 17 00:00:00 2001
From: AI-Casanova <54461896+AI-Casanova@users.noreply.github.com>
Date: Sun, 24 Nov 2024 14:16:33 -0600
Subject: [PATCH 011/162] Enable stepwise LoRA (untested)
---
modules/processing_callbacks.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/modules/processing_callbacks.py b/modules/processing_callbacks.py
index 52ea3e575..f6e3c0672 100644
--- a/modules/processing_callbacks.py
+++ b/modules/processing_callbacks.py
@@ -4,6 +4,7 @@
import torch
import numpy as np
from modules import shared, processing_correction, extra_networks, timer, prompt_parser_diffusers
+from modules.lora.networks import network_load
p = None
debug_callback = shared.log.trace if os.environ.get('SD_CALLBACK_DEBUG', None) is not None else lambda *args, **kwargs: None
@@ -63,6 +64,7 @@ def diffusers_callback(pipe, step: int = 0, timestep: int = 0, kwargs: dict = {}
time.sleep(0.1)
if hasattr(p, "stepwise_lora"):
extra_networks.activate(p, p.extra_network_data, step=step)
+ network_load()
if latents is None:
return kwargs
elif shared.opts.nan_skip:
From bf170ea08c07aa88d99695a01a43f6cfb45e1f22 Mon Sep 17 00:00:00 2001
From: AI-Casanova <54461896+AI-Casanova@users.noreply.github.com>
Date: Sun, 24 Nov 2024 23:22:57 -0600
Subject: [PATCH 012/162] Fix multiple LoRA
---
modules/lora/networks.py | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 762705b67..c6fde3e04 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -354,6 +354,8 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
if any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419 # pylint: disable=R1729
maybe_backup_weights(self, wanted_names)
if current_names != wanted_names:
+ batch_updown = None
+ batch_ex_bias = None
for net in loaded_networks:
# default workflow where module is known and has weights
module = net.modules.get(network_layer_name, None)
@@ -362,7 +364,14 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
with devices.inference_context():
weight = self.weight # calculate quant weights once
updown, ex_bias = module.calc_updown(weight)
- set_weights(self, updown, ex_bias)
+ if batch_updown is not None and updown is not None:
+ batch_updown += updown
+ else:
+ batch_updown = updown
+ if batch_ex_bias is not None and ex_bias is not None:
+ batch_ex_bias += ex_bias
+ else:
+ batch_ex_bias = ex_bias
except RuntimeError as e:
extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
if debug:
@@ -375,9 +384,7 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
continue
shared.log.warning(f'LoRA network="{net.name}" layer="{network_layer_name}" unsupported operation')
extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
- if not loaded_networks: # restore from backup
- t5 = time.time()
- set_weights(self, None, None)
+ set_weights(self, batch_updown, batch_ex_bias) # Set or restore weights from backup
self.network_current_names = wanted_names
t1 = time.time()
timer['apply'] += t1 - t0
From d76365e227fc8734982e5b1d685cf2b18890b0b5 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Mon, 25 Nov 2024 08:01:02 -0500
Subject: [PATCH 013/162] xyz improvements
Signed-off-by: Vladimir Mandic
---
modules/ui_extra_networks.py | 2 ++
scripts/xyz_grid_classes.py | 4 ++--
scripts/xyz_grid_shared.py | 40 ++++++++++++++++--------------------
3 files changed, 22 insertions(+), 24 deletions(-)
diff --git a/modules/ui_extra_networks.py b/modules/ui_extra_networks.py
index f6e6cee97..e152dc74b 100644
--- a/modules/ui_extra_networks.py
+++ b/modules/ui_extra_networks.py
@@ -135,6 +135,7 @@ def patch(self, text: str, tabname: str):
return text.replace('~tabname', tabname)
def create_xyz_grid(self):
+ """
xyz_grid = [x for x in scripts.scripts_data if x.script_class.__module__ == "xyz_grid.py"][0].module
def add_prompt(p, opt, x):
@@ -150,6 +151,7 @@ def add_prompt(p, opt, x):
opt = xyz_grid.AxisOption(f"[Network] {self.title}", str, add_prompt, choices=lambda: [x["name"] for x in self.items])
if opt not in xyz_grid.axis_options:
xyz_grid.axis_options.append(opt)
+ """
def link_preview(self, filename):
quoted_filename = urllib.parse.quote(filename.replace('\\', '/'))
diff --git a/scripts/xyz_grid_classes.py b/scripts/xyz_grid_classes.py
index b80b9f13c..06772856c 100644
--- a/scripts/xyz_grid_classes.py
+++ b/scripts/xyz_grid_classes.py
@@ -1,4 +1,4 @@
-from scripts.xyz_grid_shared import apply_field, apply_task_args, apply_setting, apply_prompt, apply_order, apply_sampler, apply_hr_sampler_name, confirm_samplers, apply_checkpoint, apply_refiner, apply_unet, apply_dict, apply_clip_skip, apply_vae, list_lora, apply_lora, apply_te, apply_styles, apply_upscaler, apply_context, apply_detailer, apply_override, apply_processing, apply_options, apply_seed, format_value_add_label, format_value, format_value_join_list, do_nothing, format_nothing, str_permutations # pylint: disable=no-name-in-module, unused-import
+from scripts.xyz_grid_shared import apply_field, apply_task_args, apply_setting, apply_prompt, apply_order, apply_sampler, apply_hr_sampler_name, confirm_samplers, apply_checkpoint, apply_refiner, apply_unet, apply_dict, apply_clip_skip, apply_vae, list_lora, apply_lora, apply_lora_strength, apply_te, apply_styles, apply_upscaler, apply_context, apply_detailer, apply_override, apply_processing, apply_options, apply_seed, format_value_add_label, format_value, format_value_join_list, do_nothing, format_nothing, str_permutations # pylint: disable=no-name-in-module, unused-import
from modules import shared, shared_items, sd_samplers, ipadapter, sd_models, sd_vae, sd_unet
@@ -97,7 +97,7 @@ def __exit__(self, exc_type, exc_value, tb):
AxisOption("[Prompt] Prompt order", str_permutations, apply_order, fmt=format_value_join_list),
AxisOption("[Prompt] Prompt parser", str, apply_setting("prompt_attention"), choices=lambda: ["native", "compel", "xhinker", "a1111", "fixed"]),
AxisOption("[Network] LoRA", str, apply_lora, cost=0.5, choices=list_lora),
- AxisOption("[Network] LoRA strength", float, apply_setting('extra_networks_default_multiplier')),
+ AxisOption("[Network] LoRA strength", float, apply_lora_strength),
AxisOption("[Network] Styles", str, apply_styles, choices=lambda: [s.name for s in shared.prompt_styles.styles.values()]),
AxisOption("[Param] Width", int, apply_field("width")),
AxisOption("[Param] Height", int, apply_field("height")),
diff --git a/scripts/xyz_grid_shared.py b/scripts/xyz_grid_shared.py
index d3ee0a864..82387fab8 100644
--- a/scripts/xyz_grid_shared.py
+++ b/scripts/xyz_grid_shared.py
@@ -63,28 +63,15 @@ def apply_seed(p, x, xs):
def apply_prompt(p, x, xs):
- if not hasattr(p, 'orig_prompt'):
- p.orig_prompt = p.prompt
- p.orig_negative = p.negative_prompt
- if xs[0] not in p.orig_prompt and xs[0] not in p.orig_negative:
- shared.log.warning(f'XYZ grid: prompt S/R string="{xs[0]}" not found')
- else:
- p.prompt = p.orig_prompt.replace(xs[0], x)
- p.negative_prompt = p.orig_negative.replace(xs[0], x)
- p.all_prompts = None
- p.all_negative_prompts = None
- """
- if p.all_prompts is not None:
- for i in range(len(p.all_prompts)):
- for j in range(len(xs)):
- p.all_prompts[i] = p.all_prompts[i].replace(xs[j], x)
- p.negative_prompt = p.negative_prompt.replace(xs[0], x)
- if p.all_negative_prompts is not None:
- for i in range(len(p.all_negative_prompts)):
- for j in range(len(xs)):
- p.all_negative_prompts[i] = p.all_negative_prompts[i].replace(xs[j], x)
- """
- shared.log.debug(f'XYZ grid apply prompt: "{xs[0]}"="{x}"')
+ for s in xs:
+ if s in p.prompt:
+ shared.log.debug(f'XYZ grid apply prompt: "{s}"="{x}"')
+ p.prompt = p.prompt.replace(s, x)
+ if s in p.negative_prompt:
+ shared.log.debug(f'XYZ grid apply negative: "{s}"="{x}"')
+ p.negative_prompt = p.negative_prompt.replace(s, x)
+ p.all_prompts = None
+ p.all_negative_prompts = None
def apply_order(p, x, xs):
@@ -220,6 +207,15 @@ def apply_lora(p, x, xs):
shared.log.debug(f'XYZ grid apply LoRA: "{x}"')
+def apply_lora_strength(p, x, xs):
+ shared.log.debug(f'XYZ grid apply LoRA strength: "{x}"')
+ p.prompt = p.prompt.replace(':1.0>', '>')
+ p.prompt = p.prompt.replace(f':{shared.opts.extra_networks_default_multiplier}>', '>')
+ p.all_prompts = None
+ p.all_negative_prompts = None
+ shared.opts.data['extra_networks_default_multiplier'] = x
+
+
def apply_te(p, x, xs):
shared.opts.data["sd_text_encoder"] = x
sd_models.reload_text_encoder()
From 172108127da14ab07ac2077b102137bda143deed Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Mon, 25 Nov 2024 08:11:40 -0500
Subject: [PATCH 014/162] xyz improvements
Signed-off-by: Vladimir Mandic
---
scripts/xyz_grid_classes.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/scripts/xyz_grid_classes.py b/scripts/xyz_grid_classes.py
index 06772856c..cc70d68f8 100644
--- a/scripts/xyz_grid_classes.py
+++ b/scripts/xyz_grid_classes.py
@@ -97,7 +97,7 @@ def __exit__(self, exc_type, exc_value, tb):
AxisOption("[Prompt] Prompt order", str_permutations, apply_order, fmt=format_value_join_list),
AxisOption("[Prompt] Prompt parser", str, apply_setting("prompt_attention"), choices=lambda: ["native", "compel", "xhinker", "a1111", "fixed"]),
AxisOption("[Network] LoRA", str, apply_lora, cost=0.5, choices=list_lora),
- AxisOption("[Network] LoRA strength", float, apply_lora_strength),
+ AxisOption("[Network] LoRA strength", float, apply_lora_strength, cost=0.6),
AxisOption("[Network] Styles", str, apply_styles, choices=lambda: [s.name for s in shared.prompt_styles.styles.values()]),
AxisOption("[Param] Width", int, apply_field("width")),
AxisOption("[Param] Height", int, apply_field("height")),
From 9e56a8f0a742cfe70bc9ef44f47edb546285b661 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Mon, 25 Nov 2024 08:49:02 -0500
Subject: [PATCH 015/162] avoid live-preview during vae-decode
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 7 ++++--
javascript/logger.js | 18 ++++++--------
javascript/notification.js | 50 ++++++++++++++++++++------------------
javascript/progressBar.js | 12 ++++++---
modules/shared_state.py | 5 ++++
5 files changed, 53 insertions(+), 39 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index cbd29840d..9733dfa2b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
# Change Log for SD.Next
-## Update for 2024-11-24
+## Update for 2024-11-26
- [Flux Tools](https://blackforestlabs.ai/flux-1-tools/):
**Redux** is actually a tool, **Fill** is inpaint/outpaint optimized version of *Flux-dev*
@@ -29,7 +29,7 @@
- Sampler improvements
- update DPM FlowMatch samplers
- UI:
- - browser->server logging
+ - browser->server logging framework
- Fixes:
- update `diffusers`
- fix README links
@@ -37,6 +37,9 @@
- relax settings validator
- improve js progress calls resiliency
- fix text-to-video pipeline
+ - avoid live-preview if vae-decode is running
+ - allow xyz-grid with multi-axis s&r
+ - fix xyz-grid with lora
## Update for 2024-11-21
diff --git a/javascript/logger.js b/javascript/logger.js
index 5aa8face3..1677fa537 100644
--- a/javascript/logger.js
+++ b/javascript/logger.js
@@ -1,5 +1,3 @@
-const serverTimeout = 5000;
-
const log = async (...msg) => {
const dt = new Date();
const ts = `${dt.getHours().toString().padStart(2, '0')}:${dt.getMinutes().toString().padStart(2, '0')}:${dt.getSeconds().toString().padStart(2, '0')}.${dt.getMilliseconds().toString().padStart(3, '0')}`;
@@ -19,15 +17,15 @@ const error = async (...msg) => {
const ts = `${dt.getHours().toString().padStart(2, '0')}:${dt.getMinutes().toString().padStart(2, '0')}:${dt.getSeconds().toString().padStart(2, '0')}.${dt.getMilliseconds().toString().padStart(3, '0')}`;
if (window.logger) window.logger.innerHTML += window.logPrettyPrint(...msg);
console.error(ts, ...msg); // eslint-disable-line no-console
- const txt = msg.join(' ');
- if (!txt.includes('asctime') && !txt.includes('xhr.')) xhrPost('/sdapi/v1/log', { error: txt }); // eslint-disable-line no-use-before-define
+ // const txt = msg.join(' ');
+ // if (!txt.includes('asctime') && !txt.includes('xhr.')) xhrPost('/sdapi/v1/log', { error: txt }); // eslint-disable-line no-use-before-define
};
-const xhrInternal = (xhrObj, data, handler = undefined, errorHandler = undefined, ignore = false) => {
+const xhrInternal = (xhrObj, data, handler = undefined, errorHandler = undefined, ignore = false, serverTimeout = 5000) => {
const err = (msg) => {
if (!ignore) {
error(`${msg}: state=${xhrObj.readyState} status=${xhrObj.status} response=${xhrObj.responseText}`);
- if (errorHandler) errorHandler();
+ if (errorHandler) errorHandler(xhrObj);
}
};
@@ -54,15 +52,15 @@ const xhrInternal = (xhrObj, data, handler = undefined, errorHandler = undefined
xhrObj.send(req);
};
-const xhrGet = (url, data, handler = undefined, errorHandler = undefined, ignore = false) => {
+const xhrGet = (url, data, handler = undefined, errorHandler = undefined, ignore = false, serverTimeout = 5000) => {
const xhr = new XMLHttpRequest();
const args = Object.keys(data).map((k) => `${encodeURIComponent(k)}=${encodeURIComponent(data[k])}`).join('&');
xhr.open('GET', `${url}?${args}`, true);
- xhrInternal(xhr, data, handler, errorHandler, ignore);
+ xhrInternal(xhr, data, handler, errorHandler, ignore, serverTimeout);
};
-function xhrPost(url, data, handler = undefined, errorHandler = undefined, ignore = false) {
+function xhrPost(url, data, handler = undefined, errorHandler = undefined, ignore = false, serverTimeout = 5000) {
const xhr = new XMLHttpRequest();
xhr.open('POST', url, true);
- xhrInternal(xhr, data, handler, errorHandler, ignore);
+ xhrInternal(xhr, data, handler, errorHandler, ignore, serverTimeout);
}
diff --git a/javascript/notification.js b/javascript/notification.js
index 33e8d1c55..c702c90e7 100644
--- a/javascript/notification.js
+++ b/javascript/notification.js
@@ -4,28 +4,32 @@ let lastHeadImg = null;
let notificationButton = null;
async function sendNotification() {
- if (!notificationButton) {
- notificationButton = gradioApp().getElementById('request_notifications');
- if (notificationButton) notificationButton.addEventListener('click', (evt) => Notification.requestPermission(), true);
+ try {
+ if (!notificationButton) {
+ notificationButton = gradioApp().getElementById('request_notifications');
+ if (notificationButton) notificationButton.addEventListener('click', (evt) => Notification.requestPermission(), true);
+ }
+ if (document.hasFocus()) return; // window is in focus so don't send notifications
+ let galleryPreviews = gradioApp().querySelectorAll('div[id^="tab_"][style*="display: block"] div[id$="_results"] .thumbnail-item > img');
+ if (!galleryPreviews || galleryPreviews.length === 0) galleryPreviews = gradioApp().querySelectorAll('.thumbnail-item > img');
+ if (!galleryPreviews || galleryPreviews.length === 0) return;
+ const headImg = galleryPreviews[0]?.src;
+ if (!headImg || headImg === lastHeadImg || headImg.includes('logo-bg-')) return;
+ const audioNotification = gradioApp().querySelector('#audio_notification audio');
+ if (audioNotification) audioNotification.play();
+ lastHeadImg = headImg;
+ const imgs = new Set(Array.from(galleryPreviews).map((img) => img.src)); // Multiple copies of the images are in the DOM when one is selected
+ const notification = new Notification('SD.Next', {
+ body: `Generated ${imgs.size > 1 ? imgs.size - opts.return_grid : 1} image${imgs.size > 1 ? 's' : ''}`,
+ icon: headImg,
+ image: headImg,
+ });
+ notification.onclick = () => {
+ parent.focus();
+ this.close();
+ };
+ log('sendNotifications');
+ } catch (e) {
+ error(`sendNotification: ${e}`);
}
- if (document.hasFocus()) return; // window is in focus so don't send notifications
- let galleryPreviews = gradioApp().querySelectorAll('div[id^="tab_"][style*="display: block"] div[id$="_results"] .thumbnail-item > img');
- if (!galleryPreviews || galleryPreviews.length === 0) galleryPreviews = gradioApp().querySelectorAll('.thumbnail-item > img');
- if (!galleryPreviews || galleryPreviews.length === 0) return;
- const headImg = galleryPreviews[0]?.src;
- if (!headImg || headImg === lastHeadImg || headImg.includes('logo-bg-')) return;
- const audioNotification = gradioApp().querySelector('#audio_notification audio');
- if (audioNotification) audioNotification.play();
- lastHeadImg = headImg;
- const imgs = new Set(Array.from(galleryPreviews).map((img) => img.src)); // Multiple copies of the images are in the DOM when one is selected
- const notification = new Notification('SD.Next', {
- body: `Generated ${imgs.size > 1 ? imgs.size - opts.return_grid : 1} image${imgs.size > 1 ? 's' : ''}`,
- icon: headImg,
- image: headImg,
- });
- notification.onclick = () => {
- parent.focus();
- this.close();
- };
- log('sendNotifications');
}
diff --git a/javascript/progressBar.js b/javascript/progressBar.js
index 9d897bc87..52f666b3a 100644
--- a/javascript/progressBar.js
+++ b/javascript/progressBar.js
@@ -12,8 +12,10 @@ function formatTime(secs) {
function checkPaused(state) {
lastState.paused = state ? !state : !lastState.paused;
- document.getElementById('txt2img_pause').innerText = lastState.paused ? 'Resume' : 'Pause';
- document.getElementById('img2img_pause').innerText = lastState.paused ? 'Resume' : 'Pause';
+ const t_el = document.getElementById('txt2img_pause');
+ const i_el = document.getElementById('img2img_pause');
+ if (t_el) t_el.innerText = lastState.paused ? 'Resume' : 'Pause';
+ if (i_el) i_el.innerText = lastState.paused ? 'Resume' : 'Pause';
}
function setProgress(res) {
@@ -87,7 +89,9 @@ function requestProgress(id_task, progressEl, galleryEl, atEnd = null, onProgres
debug('taskEnd:', id_task);
localStorage.removeItem('task');
setProgress();
- if (parentGallery && livePreview) parentGallery.removeChild(livePreview);
+ try {
+ if (parentGallery && livePreview) parentGallery.removeChild(livePreview);
+ } catch { /* ignore */ }
checkPaused(true);
sendNotification();
if (atEnd) atEnd();
@@ -118,7 +122,7 @@ function requestProgress(id_task, progressEl, galleryEl, atEnd = null, onProgres
done();
};
- xhrPost('./internal/progress', { id_task, id_live_preview }, onProgressHandler, onProgressErrorHandler);
+ xhrPost('./internal/progress', { id_task, id_live_preview }, onProgressHandler, onProgressErrorHandler, false, 5000);
};
start(id_task, 0);
}
diff --git a/modules/shared_state.py b/modules/shared_state.py
index 7def42b8c..51d33f9ed 100644
--- a/modules/shared_state.py
+++ b/modules/shared_state.py
@@ -28,6 +28,9 @@ class State:
oom = False
debug_output = os.environ.get('SD_STATE_DEBUG', None)
+ def __str__(self) -> str:
+ return f'State: job={self.job} {self.job_no}/{self.job_count} step={self.sampling_step}/{self.sampling_steps} skipped={self.skipped} interrupted={self.interrupted} paused={self.paused} info={self.textinfo}'
+
def skip(self):
log.debug('Requested skip')
self.skipped = True
@@ -135,6 +138,8 @@ def end(self, api=None):
modules.devices.torch_gc()
def set_current_image(self):
+ if self.job == 'VAE': # avoid generating preview while vae is running
+ return
from modules.shared import opts, cmd_opts
"""sets self.current_image from self.current_latent if enough sampling steps have been made after the last call to this"""
if cmd_opts.lowvram or self.api:
From 8204f7f8cd49df67e183ef768cda480920ad2777 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Mon, 25 Nov 2024 08:49:25 -0500
Subject: [PATCH 016/162] update ui
Signed-off-by: Vladimir Mandic
---
extensions-builtin/sdnext-modernui | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui
index b31453f9d..a3f8a0ec4 160000
--- a/extensions-builtin/sdnext-modernui
+++ b/extensions-builtin/sdnext-modernui
@@ -1 +1 @@
-Subproject commit b31453f9d109456819673e8574162edb70fef73c
+Subproject commit a3f8a0ec45cdc991689ee61ee79626f1b69e7c21
From 23b63098000a444297f52bd20b0eca9b3610e930 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Mon, 25 Nov 2024 14:37:25 -0500
Subject: [PATCH 017/162] ui updates
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 4 ++++
javascript/base.css | 1 -
javascript/black-teal.css | 3 ++-
javascript/extraNetworks.js | 16 ++++++++++++++
javascript/progressBar.js | 38 ++++++++++++++++++--------------
javascript/sdnext.css | 18 +++++++--------
modules/call_queue.py | 11 ++++-----
modules/processing.py | 9 +++-----
modules/processing_callbacks.py | 14 ++++++------
modules/processing_correction.py | 13 +++++++++--
modules/processing_diffusers.py | 1 -
modules/shared_state.py | 4 ++++
modules/timer.py | 6 +++++
modules/txt2img.py | 2 +-
modules/ui_common.py | 13 ++++++++---
modules/ui_extra_networks.py | 2 +-
modules/ui_img2img.py | 8 +++----
17 files changed, 106 insertions(+), 57 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9733dfa2b..170c780a5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,10 @@
- Sampler improvements
- update DPM FlowMatch samplers
- UI:
+ - improved stats on generate completion
+ - improved live preview display and performance
+ - improved accordion behavior
+ - auto-size networks height for sidebar
- browser->server logging framework
- Fixes:
- update `diffusers`
diff --git a/javascript/base.css b/javascript/base.css
index 7daa8b2bd..6c18ad7c5 100644
--- a/javascript/base.css
+++ b/javascript/base.css
@@ -25,7 +25,6 @@
.progressDiv .progress { width: 0%; height: 20px; background: #0060df; color: white; font-weight: bold; line-height: 20px; padding: 0 8px 0 0; text-align: right; overflow: visible; white-space: nowrap; padding: 0 0.5em; }
.livePreview { position: absolute; z-index: 50; background-color: transparent; width: -moz-available; width: -webkit-fill-available; }
.livePreview img { position: absolute; object-fit: contain; width: 100%; height: 100%; }
-.dark .livePreview { background-color: rgb(17 24 39 / var(--tw-bg-opacity)); }
.popup-metadata { color: white; background: #0000; display: inline-block; white-space: pre-wrap; font-size: 0.75em; }
/* fullpage image viewer */
diff --git a/javascript/black-teal.css b/javascript/black-teal.css
index c6f266c54..b73f9fdc7 100644
--- a/javascript/black-teal.css
+++ b/javascript/black-teal.css
@@ -108,6 +108,7 @@ fieldset .gr-block.gr-box, label.block span { padding: 0; margin-top: -4px; }
.eta-bar { display: none !important }
.gradio-slider { max-width: 200px; }
.gradio-slider input[type="number"] { background: var(--neutral-950); margin-top: 2px; }
+.gradio-image { height: unset !important; }
svg.feather.feather-image, .feather .feather-image { display: none }
.gap-2 { padding-top: 8px; }
.gr-box > div > div > input.gr-text-input { right: 0; width: 4em; padding: 0; top: -12px; border: none; max-height: 20px; }
@@ -134,7 +135,7 @@ svg.feather.feather-image, .feather .feather-image { display: none }
.gallery-item { box-shadow: none !important; }
.performance { color: #888; }
.extra-networks { border-left: 2px solid var(--highlight-color) !important; padding-left: 4px; }
-.image-buttons { gap: 10px !important; justify-content: center; }
+.image-buttons { justify-content: center; gap: 0 !important; }
.image-buttons > button { max-width: 160px; }
.tooltip { background: var(--primary-300); color: black; border: none; border-radius: var(--radius-lg) }
#system_row > button, #settings_row > button, #config_row > button { max-width: 10em; }
diff --git a/javascript/extraNetworks.js b/javascript/extraNetworks.js
index 622e40faf..1d1bcfb24 100644
--- a/javascript/extraNetworks.js
+++ b/javascript/extraNetworks.js
@@ -434,6 +434,22 @@ function setupExtraNetworksForTab(tabname) {
};
}
+ // auto-resize networks sidebar
+ const resizeObserver = new ResizeObserver((entries) => {
+ for (const entry of entries) {
+ for (const el of Array.from(gradioApp().getElementById(`${tabname}_extra_tabs`).querySelectorAll('.extra-networks-page'))) {
+ const h = Math.trunc(entry.contentRect.height);
+ if (h <= 0) return;
+ if (window.opts.extra_networks_card_cover === 'sidebar' && window.opts.theme_type === 'Standard') el.style.height = `max(55vh, ${h - 90}px)`;
+ // log(`${tabname} height: ${entry.target.id}=${h} ${el.id}=${el.clientHeight}`);
+ }
+ }
+ });
+ const settingsEl = gradioApp().getElementById(`${tabname}_settings`);
+ const interfaceEl = gradioApp().getElementById(`${tabname}_interface`);
+ if (settingsEl) resizeObserver.observe(settingsEl);
+ if (interfaceEl) resizeObserver.observe(interfaceEl);
+
// en style
if (!en) return;
let lastView;
diff --git a/javascript/progressBar.js b/javascript/progressBar.js
index 52f666b3a..ff9be4666 100644
--- a/javascript/progressBar.js
+++ b/javascript/progressBar.js
@@ -68,29 +68,33 @@ function requestProgress(id_task, progressEl, galleryEl, atEnd = null, onProgres
let img;
const initLivePreview = () => {
+ if (!parentGallery) return;
+ const footers = Array.from(gradioApp().querySelectorAll('.gallery_footer'));
+ for (const footer of footers) footer.style.display = 'none'; // remove all footers
+
+ livePreview = document.createElement('div');
+ livePreview.className = 'livePreview';
+ parentGallery.insertBefore(livePreview, galleryEl);
img = new Image();
- if (parentGallery) {
- livePreview = document.createElement('div');
- livePreview.className = 'livePreview';
- parentGallery.insertBefore(livePreview, galleryEl);
- const rect = galleryEl.getBoundingClientRect();
- if (rect.width) {
- livePreview.style.width = `${rect.width}px`;
- livePreview.style.height = `${rect.height}px`;
- }
- img.onload = () => {
- livePreview.appendChild(img);
- if (livePreview.childElementCount > 2) livePreview.removeChild(livePreview.firstElementChild);
- };
- }
+ img.id = 'livePreviewImage';
+ livePreview.appendChild(img);
+ img.onload = () => {
+ img.style.width = `min(100%, max(${img.naturalWidth}px, 512px))`;
+ parentGallery.style.minHeight = `${img.height}px`;
+ };
};
const done = () => {
debug('taskEnd:', id_task);
localStorage.removeItem('task');
setProgress();
+ const footers = Array.from(gradioApp().querySelectorAll('.gallery_footer'));
+ for (const footer of footers) footer.style.display = 'flex'; // remove all footers
try {
- if (parentGallery && livePreview) parentGallery.removeChild(livePreview);
+ if (parentGallery && livePreview) {
+ parentGallery.removeChild(livePreview);
+ parentGallery.style.minHeight = 'unset';
+ }
} catch { /* ignore */ }
checkPaused(true);
sendNotification();
@@ -112,7 +116,9 @@ function requestProgress(id_task, progressEl, galleryEl, atEnd = null, onProgres
}
setProgress(res);
if (res.live_preview && !livePreview) initLivePreview();
- if (res.live_preview && galleryEl) img.src = res.live_preview;
+ if (res.live_preview && galleryEl) {
+ if (img.src !== res.live_preview) img.src = res.live_preview;
+ }
if (onProgress) onProgress(res);
setTimeout(() => start(id_task, id_live_preview), opts.live_preview_refresh_period || 500);
};
diff --git a/javascript/sdnext.css b/javascript/sdnext.css
index 08fae2eb8..240b7492f 100644
--- a/javascript/sdnext.css
+++ b/javascript/sdnext.css
@@ -16,7 +16,7 @@ tr { border-bottom: none !important; padding: 0 0.5em !important; }
td > div > span { overflow-y: auto; max-height: 3em; overflow-x: hidden; }
textarea { overflow-y: auto !important; }
span { font-size: var(--text-md) !important; }
-button { font-size: var(--text-lg) !important; }
+button { font-size: var(--text-lg) !important; min-width: unset !important; }
input[type='color'] { width: 64px; height: 32px; }
input::-webkit-outer-spin-button, input::-webkit-inner-spin-button { margin-left: 4px; }
@@ -83,13 +83,14 @@ button.custom-button { border-radius: var(--button-large-radius); padding: var(-
.block.token-counter div{ display: inline; }
.block.token-counter span{ padding: 0.1em 0.75em; }
.performance { font-size: var(--text-xs); color: #444; }
-.performance p { display: inline-block; color: var(--body-text-color-subdued) !important }
+.performance p { display: inline-block; color: var(--primary-500) !important }
.performance .time { margin-right: 0; }
.thumbnails { background: var(--body-background-fill); }
-.control-image { height: calc(100vw/3) !important; }
.prompt textarea { resize: vertical; }
+.image-container { height: unset !important; }
+.control-image { height: unset !important; }
+.grid-wrap { overflow-y: auto !important; }
#control_results { margin: 0; padding: 0; }
-#control_gallery { height: calc(100vw/3 + 60px); }
#txt2img_gallery, #img2img_gallery { height: 50vh; }
#control-result { background: var(--button-secondary-background-fill); padding: 0.2em; }
#control-inputs { margin-top: 1em; }
@@ -122,7 +123,7 @@ div#extras_scale_to_tab div.form { flex-direction: row; }
#img2img_sketch, #img2maskimg, #inpaint_sketch { overflow: overlay !important; resize: auto; background: var(--panel-background-fill); z-index: 5; }
.image-buttons button { min-width: auto; }
.infotext { overflow-wrap: break-word; line-height: 1.5em; font-size: 0.95em !important; }
-.infotext > p { padding-left: 1em; text-indent: -1em; white-space: pre-wrap; color: var(--block-info-text-color) !important; }
+.infotext > p { white-space: pre-wrap; color: var(--block-info-text-color) !important; }
.tooltip { display: block; position: fixed; top: 1em; right: 1em; padding: 0.5em; background: var(--input-background-fill); color: var(--body-text-color); border: 1pt solid var(--button-primary-border-color);
width: 22em; min-height: 1.3em; font-size: var(--text-xs); transition: opacity 0.2s ease-in; pointer-events: none; opacity: 0; z-index: 999; }
.tooltip-show { opacity: 0.9; }
@@ -158,11 +159,10 @@ div#extras_scale_to_tab div.form { flex-direction: row; }
.progressDiv { position: relative; height: 20px; background: #b4c0cc; margin-bottom: -3px; }
.dark .progressDiv { background: #424c5b; }
.progressDiv .progress { width: 0%; height: 20px; background: #0060df; color: white; font-weight: bold; line-height: 20px; padding: 0 8px 0 0; text-align: right; overflow: visible; white-space: nowrap; padding: 0 0.5em; }
-.livePreview { position: absolute; z-index: 50; background-color: transparent; width: -moz-available; width: -webkit-fill-available; }
-.livePreview img { position: absolute; object-fit: contain; width: 100%; height: 100%; }
-.dark .livePreview { background-color: rgb(17 24 39 / var(--tw-bg-opacity)); }
+.livePreview { position: absolute; z-index: 50; width: -moz-available; width: -webkit-fill-available; height: 100%; background-color: var(--background-color); }
+.livePreview img { object-fit: contain; width: 100%; justify-self: center; }
.popup-metadata { color: white; background: #0000; display: inline-block; white-space: pre-wrap; font-size: var(--text-xxs); }
-
+.generating { animation: unset !important; border: unset !important; }
/* fullpage image viewer */
#lightboxModal { display: none; position: fixed; z-index: 1001; left: 0; top: 0; width: 100%; height: 100%; overflow: hidden; background-color: rgba(20, 20, 20, 0.75); backdrop-filter: blur(6px);
user-select: none; -webkit-user-select: none; flex-direction: row; font-family: 'NotoSans';}
diff --git a/modules/call_queue.py b/modules/call_queue.py
index 4065d13d9..cdc2fe1f7 100644
--- a/modules/call_queue.py
+++ b/modules/call_queue.py
@@ -2,7 +2,7 @@
import threading
import time
import cProfile
-from modules import shared, progress, errors
+from modules import shared, progress, errors, timer
queue_lock = threading.Lock()
@@ -73,15 +73,16 @@ def f(*args, extra_outputs_array=extra_outputs, **kwargs):
elapsed_m = int(elapsed // 60)
elapsed_s = elapsed % 60
elapsed_text = f"{elapsed_m}m {elapsed_s:.2f}s" if elapsed_m > 0 else f"{elapsed_s:.2f}s"
+ summary = timer.process.summary(min_time=0.1, total=False).replace('=', ' ')
vram_html = ''
if not shared.mem_mon.disabled:
vram = {k: -(v//-(1024*1024)) for k, v in shared.mem_mon.read().items()}
+ used = round(100 * vram['used'] / (vram['total'] + 0.001))
if vram.get('active_peak', 0) > 0:
- vram_html = " | "
- vram_html += f"GPU active {max(vram['active_peak'], vram['reserved_peak'])} MB reserved {vram['reserved']} | used {vram['used']} MB free {vram['free']} MB total {vram['total']} MB"
+ vram_html = " | "
+ vram_html += f"GPU {max(vram['active_peak'], vram['reserved_peak'])} MB {used}%"
vram_html += f" | retries {vram['retries']} oom {vram['oom']}" if vram.get('retries', 0) > 0 or vram.get('oom', 0) > 0 else ''
- vram_html += "
"
if isinstance(res, list):
- res[-1] += f""
+ res[-1] += f""
return tuple(res)
return f
diff --git a/modules/processing.py b/modules/processing.py
index 0d557e64e..16e7a9213 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -323,7 +323,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
processed = p.scripts.process_images(p)
if processed is not None:
samples = processed.images
- infotexts = processed.infotexts
+ infotexts += processed.infotexts
if samples is None:
if not shared.native:
from modules.processing_original import process_original
@@ -393,11 +393,8 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
if shared.opts.mask_apply_overlay:
image = apply_overlay(image, p.paste_to, i, p.overlay_images)
- if len(infotexts) > i:
- info = infotexts[i]
- else:
- info = create_infotext(p, p.prompts, p.seeds, p.subseeds, index=i, all_negative_prompts=p.negative_prompts)
- infotexts.append(info)
+ info = create_infotext(p, p.prompts, p.seeds, p.subseeds, index=i, all_negative_prompts=p.negative_prompts)
+ infotexts.append(info)
image.info["parameters"] = info
output_images.append(image)
if shared.opts.samples_save and not p.do_not_save_samples and p.outpath_samples is not None:
diff --git a/modules/processing_callbacks.py b/modules/processing_callbacks.py
index 52ea3e575..7d6c8ec04 100644
--- a/modules/processing_callbacks.py
+++ b/modules/processing_callbacks.py
@@ -6,7 +6,8 @@
from modules import shared, processing_correction, extra_networks, timer, prompt_parser_diffusers
p = None
-debug_callback = shared.log.trace if os.environ.get('SD_CALLBACK_DEBUG', None) is not None else lambda *args, **kwargs: None
+debug = os.environ.get('SD_CALLBACK_DEBUG', None) is not None
+debug_callback = shared.log.trace if debug else lambda *args, **kwargs: None
def set_callbacks_p(processing):
@@ -50,7 +51,8 @@ def diffusers_callback(pipe, step: int = 0, timestep: int = 0, kwargs: dict = {}
if p is None:
return kwargs
latents = kwargs.get('latents', None)
- debug_callback(f'Callback: step={step} timestep={timestep} latents={latents.shape if latents is not None else None} kwargs={list(kwargs)}')
+ if debug:
+ debug_callback(f'Callback: step={step} timestep={timestep} latents={latents.shape if latents is not None else None} kwargs={list(kwargs)}')
order = getattr(pipe.scheduler, "order", 1) if hasattr(pipe, 'scheduler') else 1
shared.state.sampling_step = step // order
if shared.state.interrupted or shared.state.skipped:
@@ -67,7 +69,7 @@ def diffusers_callback(pipe, step: int = 0, timestep: int = 0, kwargs: dict = {}
return kwargs
elif shared.opts.nan_skip:
assert not torch.isnan(latents[..., 0, 0]).all(), f'NaN detected at step {step}: Skipping...'
- if len(getattr(p, 'ip_adapter_names', [])) > 0:
+ if len(getattr(p, 'ip_adapter_names', [])) > 0 and p.ip_adapter_names[0] != 'None':
ip_adapter_scales = list(p.ip_adapter_scales)
ip_adapter_starts = list(p.ip_adapter_starts)
ip_adapter_ends = list(p.ip_adapter_ends)
@@ -78,7 +80,7 @@ def diffusers_callback(pipe, step: int = 0, timestep: int = 0, kwargs: dict = {}
debug_callback(f"Callback: IP Adapter scales={ip_adapter_scales}")
pipe.set_ip_adapter_scale(ip_adapter_scales)
if step != getattr(pipe, 'num_timesteps', 0):
- kwargs = processing_correction.correction_callback(p, timestep, kwargs)
+ kwargs = processing_correction.correction_callback(p, timestep, kwargs, initial=step == 0)
kwargs = prompt_callback(step, kwargs) # monkey patch for diffusers callback issues
if step == int(getattr(pipe, 'num_timesteps', 100) * p.cfg_end) and 'prompt_embeds' in kwargs and 'negative_prompt_embeds' in kwargs:
if "PAG" in shared.sd_model.__class__.__name__:
@@ -105,7 +107,5 @@ def diffusers_callback(pipe, step: int = 0, timestep: int = 0, kwargs: dict = {}
if shared.cmd_opts.profile and shared.profiler is not None:
shared.profiler.step()
t1 = time.time()
- if 'callback' not in timer.process.records:
- timer.process.records['callback'] = 0
- timer.process.records['callback'] += t1 - t0
+ timer.process.add('callback', t1 - t0)
return kwargs
diff --git a/modules/processing_correction.py b/modules/processing_correction.py
index e715d8c49..050fae889 100644
--- a/modules/processing_correction.py
+++ b/modules/processing_correction.py
@@ -7,9 +7,11 @@
import torch
from modules import shared, sd_vae_taesd, devices
+
debug_enabled = os.environ.get('SD_HDR_DEBUG', None) is not None
debug = shared.log.trace if debug_enabled else lambda *args, **kwargs: None
debug('Trace: HDR')
+skip_correction = False
def sharpen_tensor(tensor, ratio=0):
@@ -116,8 +118,15 @@ def correction(p, timestep, latent):
return latent
-def correction_callback(p, timestep, kwargs):
- if not any([p.hdr_clamp, p.hdr_mode, p.hdr_maximize, p.hdr_sharpen, p.hdr_color, p.hdr_brightness, p.hdr_tint_ratio]):
+def correction_callback(p, timestep, kwargs, initial: bool = False):
+ global skip_correction # pylint: disable=global-statement
+ if initial:
+ if not any([p.hdr_clamp, p.hdr_mode, p.hdr_maximize, p.hdr_sharpen, p.hdr_color, p.hdr_brightness, p.hdr_tint_ratio]):
+ skip_correction = True
+ return kwargs
+ else:
+ skip_correction = False
+ elif skip_correction:
return kwargs
latents = kwargs["latents"]
if debug_enabled:
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index 44dff811b..7b91fcd42 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -75,7 +75,6 @@ def process_base(p: processing.StableDiffusionProcessing):
clip_skip=p.clip_skip,
desc='Base',
)
- timer.process.record('args')
shared.state.sampling_steps = base_args.get('prior_num_inference_steps', None) or p.steps or base_args.get('num_inference_steps', None)
if shared.opts.scheduler_eta is not None and shared.opts.scheduler_eta > 0 and shared.opts.scheduler_eta < 1:
p.extra_generation_params["Sampler Eta"] = shared.opts.scheduler_eta
diff --git a/modules/shared_state.py b/modules/shared_state.py
index 51d33f9ed..0173b540c 100644
--- a/modules/shared_state.py
+++ b/modules/shared_state.py
@@ -2,6 +2,7 @@
import time
import datetime
from modules.errors import log
+from modules import timer
class State:
@@ -150,6 +151,7 @@ def set_current_image(self):
def do_set_current_image(self):
if self.current_latent is None:
return
+ t0 = time.time()
from modules.shared import opts
import modules.sd_samplers # pylint: disable=W0621
try:
@@ -159,6 +161,8 @@ def do_set_current_image(self):
except Exception:
# log.error(f'Error setting current image: step={self.sampling_step} {e}')
pass
+ t1 = time.time()
+ timer.process.add('preview', t1 - t0)
def assign_current_image(self, image):
self.current_image = image
diff --git a/modules/timer.py b/modules/timer.py
index 8a5db726d..7657ac8e8 100644
--- a/modules/timer.py
+++ b/modules/timer.py
@@ -15,6 +15,12 @@ def elapsed(self, reset=True):
self.start = end
return res
+ def add(self, name, t):
+ if name not in self.records:
+ self.records[name] = t
+ else:
+ self.records[name] += t
+
def record(self, category=None, extra_time=0, reset=True):
e = self.elapsed(reset)
if category is None:
diff --git a/modules/txt2img.py b/modules/txt2img.py
index 2f0e2f4b3..e82c744a2 100644
--- a/modules/txt2img.py
+++ b/modules/txt2img.py
@@ -88,7 +88,7 @@ def txt2img(id_task, state,
p.scripts = scripts.scripts_txt2img
p.script_args = args
p.state = state
- processed = scripts.scripts_txt2img.run(p, *args)
+ processed: processing.Processed = scripts.scripts_txt2img.run(p, *args)
if processed is None:
processed = processing.process_images(p)
processed = scripts.scripts_txt2img.after(p, processed, *args)
diff --git a/modules/ui_common.py b/modules/ui_common.py
index 9c4bb5cdc..e21033718 100644
--- a/modules/ui_common.py
+++ b/modules/ui_common.py
@@ -245,10 +245,17 @@ def create_output_panel(tabname, preview=True, prompt=None, height=None):
gr.HTML(value="", elem_id="main_info", visible=False, elem_classes=["main-info"])
# columns are for <576px, <768px, <992px, <1200px, <1400px, >1400px
result_gallery = gr.Gallery(value=[],
- label='Output', show_label=False, show_download_button=True, allow_preview=True, container=False, preview=preview,
- columns=4, object_fit='scale-down', height=height,
+ label='Output',
+ show_label=False,
+ show_download_button=True,
+ allow_preview=True,
+ container=False,
+ preview=preview,
+ columns=4,
+ object_fit='scale-down',
+ height=height,
elem_id=f"{tabname}_gallery",
- )
+ )
if prompt is not None:
interrogate_clip_btn, interrogate_booru_btn = ui_sections.create_interrogate_buttons('control')
interrogate_clip_btn.click(fn=interrogate_clip, inputs=[result_gallery], outputs=[prompt])
diff --git a/modules/ui_extra_networks.py b/modules/ui_extra_networks.py
index e152dc74b..c326219df 100644
--- a/modules/ui_extra_networks.py
+++ b/modules/ui_extra_networks.py
@@ -16,7 +16,7 @@
import gradio as gr
from PIL import Image
from starlette.responses import FileResponse, JSONResponse
-from modules import paths, shared, scripts, files_cache, errors, infotext
+from modules import paths, shared, files_cache, errors, infotext
from modules.ui_components import ToolButton
import modules.ui_symbols as symbols
diff --git a/modules/ui_img2img.py b/modules/ui_img2img.py
index 22c89dac8..046c181ce 100644
--- a/modules/ui_img2img.py
+++ b/modules/ui_img2img.py
@@ -68,20 +68,20 @@ def add_copy_image_controls(tab_name, elem):
img2img_selected_tab = gr.State(0) # pylint: disable=abstract-class-instantiated
state = gr.Textbox(value='', visible=False)
with gr.TabItem('Image', id='img2img', elem_id="img2img_img2img_tab") as tab_img2img:
- init_img = gr.Image(label="Image for img2img", elem_id="img2img_image", show_label=False, source="upload", interactive=True, type="pil", tool="editor", image_mode="RGBA", height=512)
+ init_img = gr.Image(label="Image for img2img", elem_id="img2img_image", show_label=False, source="upload", interactive=True, type="pil", tool="editor", image_mode="RGBA")
interrogate_clip, interrogate_booru = ui_sections.create_interrogate_buttons('img2img')
add_copy_image_controls('img2img', init_img)
with gr.TabItem('Sketch', id='img2img_sketch', elem_id="img2img_img2img_sketch_tab") as tab_sketch:
- sketch = gr.Image(label="Image for img2img", elem_id="img2img_sketch", show_label=False, source="upload", interactive=True, type="pil", tool="color-sketch", image_mode="RGBA", height=512)
+ sketch = gr.Image(label="Image for img2img", elem_id="img2img_sketch", show_label=False, source="upload", interactive=True, type="pil", tool="color-sketch", image_mode="RGBA")
add_copy_image_controls('sketch', sketch)
with gr.TabItem('Inpaint', id='inpaint', elem_id="img2img_inpaint_tab") as tab_inpaint:
- init_img_with_mask = gr.Image(label="Image for inpainting with mask", show_label=False, elem_id="img2maskimg", source="upload", interactive=True, type="pil", tool="sketch", image_mode="RGBA", height=512)
+ init_img_with_mask = gr.Image(label="Image for inpainting with mask", show_label=False, elem_id="img2maskimg", source="upload", interactive=True, type="pil", tool="sketch", image_mode="RGBA")
add_copy_image_controls('inpaint', init_img_with_mask)
with gr.TabItem('Composite', id='inpaint_sketch', elem_id="img2img_inpaint_sketch_tab") as tab_inpaint_color:
- inpaint_color_sketch = gr.Image(label="Color sketch inpainting", show_label=False, elem_id="inpaint_sketch", source="upload", interactive=True, type="pil", tool="color-sketch", image_mode="RGBA", height=512)
+ inpaint_color_sketch = gr.Image(label="Color sketch inpainting", show_label=False, elem_id="inpaint_sketch", source="upload", interactive=True, type="pil", tool="color-sketch", image_mode="RGBA")
inpaint_color_sketch_orig = gr.State(None) # pylint: disable=abstract-class-instantiated
add_copy_image_controls('inpaint_sketch', inpaint_color_sketch)
From 58146c0fc7851e44a15f66fda4a381b914454442 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Mon, 25 Nov 2024 14:42:46 -0500
Subject: [PATCH 018/162] correct preview stats
Signed-off-by: Vladimir Mandic
---
modules/sd_samplers_common.py | 6 +++++-
modules/shared_state.py | 4 ----
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py
index a487fe9b7..f6f6c18d5 100644
--- a/modules/sd_samplers_common.py
+++ b/modules/sd_samplers_common.py
@@ -1,9 +1,10 @@
+import time
import threading
from collections import namedtuple
import torch
import torchvision.transforms as T
from PIL import Image
-from modules import shared, devices, processing, images, sd_vae_approx, sd_vae_taesd, sd_vae_stablecascade, sd_samplers
+from modules import shared, devices, processing, images, sd_vae_approx, sd_vae_taesd, sd_vae_stablecascade, sd_samplers, timer
SamplerData = namedtuple('SamplerData', ['name', 'constructor', 'aliases', 'options'])
@@ -33,6 +34,7 @@ def setup_img2img_steps(p, steps=None):
def single_sample_to_image(sample, approximation=None):
with queue_lock:
+ t0 = time.time()
sd_cascade = False
if approximation is None:
approximation = approximation_indexes.get(shared.opts.show_progress_type, None)
@@ -84,6 +86,8 @@ def single_sample_to_image(sample, approximation=None):
except Exception as e:
warn_once(f'Preview: {e}')
image = Image.new(mode="RGB", size=(512, 512))
+ t1 = time.time()
+ timer.process.add('preview', t1 - t0)
return image
diff --git a/modules/shared_state.py b/modules/shared_state.py
index 0173b540c..51d33f9ed 100644
--- a/modules/shared_state.py
+++ b/modules/shared_state.py
@@ -2,7 +2,6 @@
import time
import datetime
from modules.errors import log
-from modules import timer
class State:
@@ -151,7 +150,6 @@ def set_current_image(self):
def do_set_current_image(self):
if self.current_latent is None:
return
- t0 = time.time()
from modules.shared import opts
import modules.sd_samplers # pylint: disable=W0621
try:
@@ -161,8 +159,6 @@ def do_set_current_image(self):
except Exception:
# log.error(f'Error setting current image: step={self.sampling_step} {e}')
pass
- t1 = time.time()
- timer.process.add('preview', t1 - t0)
def assign_current_image(self, image):
self.current_image = image
From 4468144031d7c28c35c609c19889ca433a673c47 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Mon, 25 Nov 2024 19:41:07 -0500
Subject: [PATCH 019/162] update modernui
Signed-off-by: Vladimir Mandic
---
extensions-builtin/sdnext-modernui | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui
index a3f8a0ec4..ea7062d27 160000
--- a/extensions-builtin/sdnext-modernui
+++ b/extensions-builtin/sdnext-modernui
@@ -1 +1 @@
-Subproject commit a3f8a0ec45cdc991689ee61ee79626f1b69e7c21
+Subproject commit ea7062d27d11ee50a0f22a34753f81a9c9e9d57c
From 14c8414025aeafe65d1ff579589ddf9a45b5fb52 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Mon, 25 Nov 2024 21:02:46 -0500
Subject: [PATCH 020/162] update stats and previews
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 3 ++
extensions-builtin/sdnext-modernui | 2 +-
javascript/progressBar.js | 6 +++-
modules/control/run.py | 7 ++--
modules/ui_common.py | 1 +
modules/ui_control.py | 57 ++++++++++++++++++++++--------
6 files changed, 54 insertions(+), 22 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 170c780a5..3ff4f0944 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -33,6 +33,9 @@
- improved live preview display and performance
- improved accordion behavior
- auto-size networks height for sidebar
+ - control: hide preview column by default
+ - control: optionn to hide input column
+ - control: add stats
- browser->server logging framework
- Fixes:
- update `diffusers`
diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui
index ea7062d27..3008cee4b 160000
--- a/extensions-builtin/sdnext-modernui
+++ b/extensions-builtin/sdnext-modernui
@@ -1 +1 @@
-Subproject commit ea7062d27d11ee50a0f22a34753f81a9c9e9d57c
+Subproject commit 3008cee4b67bb00f8f1a4fe4510ec27ba92aa418
diff --git a/javascript/progressBar.js b/javascript/progressBar.js
index ff9be4666..c385fe5db 100644
--- a/javascript/progressBar.js
+++ b/javascript/progressBar.js
@@ -71,6 +71,8 @@ function requestProgress(id_task, progressEl, galleryEl, atEnd = null, onProgres
if (!parentGallery) return;
const footers = Array.from(gradioApp().querySelectorAll('.gallery_footer'));
for (const footer of footers) footer.style.display = 'none'; // remove all footers
+ const galleries = Array.from(gradioApp().querySelectorAll('.gallery_main'));
+ for (const gallery of galleries) gallery.style.display = 'none'; // remove all footers
livePreview = document.createElement('div');
livePreview.className = 'livePreview';
@@ -89,7 +91,9 @@ function requestProgress(id_task, progressEl, galleryEl, atEnd = null, onProgres
localStorage.removeItem('task');
setProgress();
const footers = Array.from(gradioApp().querySelectorAll('.gallery_footer'));
- for (const footer of footers) footer.style.display = 'flex'; // remove all footers
+ for (const footer of footers) footer.style.display = 'flex'; // restore all footers
+ const galleries = Array.from(gradioApp().querySelectorAll('.gallery_main'));
+ for (const gallery of galleries) gallery.style.display = 'flex'; // remove all galleries
try {
if (parentGallery && livePreview) {
parentGallery.removeChild(livePreview);
diff --git a/modules/control/run.py b/modules/control/run.py
index 5d6343c98..88dddc213 100644
--- a/modules/control/run.py
+++ b/modules/control/run.py
@@ -717,14 +717,11 @@ def set_pipe():
shared.log.error(f'Control pipeline failed: type={unit_type} units={len(active_model)} error={e}')
errors.display(e, 'Control')
- t_end = time.time()
-
if len(output_images) == 0:
output_images = None
image_txt = '| Images None'
else:
- image_str = [f'{image.width}x{image.height}' for image in output_images]
- image_txt = f'| Time {t_end-t_start:.2f}s | Images {len(output_images)} | Size {" ".join(image_str)}'
+ image_txt = ''
p.init_images = output_images # may be used for hires
if video_type != 'None' and isinstance(output_images, list):
@@ -738,7 +735,7 @@ def set_pipe():
restore_pipeline()
debug(f'Ready: {image_txt}')
- html_txt = f'Ready {image_txt}
'
+ html_txt = f'Ready {image_txt}
' if image_txt != '' else ''
if len(info_txt) > 0:
html_txt = html_txt + infotext_to_html(info_txt[0])
if is_generator:
diff --git a/modules/ui_common.py b/modules/ui_common.py
index e21033718..3e7c68bec 100644
--- a/modules/ui_common.py
+++ b/modules/ui_common.py
@@ -255,6 +255,7 @@ def create_output_panel(tabname, preview=True, prompt=None, height=None):
object_fit='scale-down',
height=height,
elem_id=f"{tabname}_gallery",
+ elem_classes=["gallery_main"],
)
if prompt is not None:
interrogate_clip_btn, interrogate_booru_btn = ui_sections.create_interrogate_buttons('control')
diff --git a/modules/ui_control.py b/modules/ui_control.py
index 0bf070036..f4329663a 100644
--- a/modules/ui_control.py
+++ b/modules/ui_control.py
@@ -9,7 +9,7 @@
from modules.control.units import lite # vislearn ControlNet-XS
from modules.control.units import t2iadapter # TencentARC T2I-Adapter
from modules.control.units import reference # reference pipeline
-from modules import errors, shared, progress, ui_components, ui_symbols, ui_common, ui_sections, generation_parameters_copypaste, call_queue, scripts, masking, images, processing_vae # pylint: disable=ungrouped-imports
+from modules import errors, shared, progress, ui_components, ui_symbols, ui_common, ui_sections, generation_parameters_copypaste, call_queue, scripts, masking, images, processing_vae, timer # pylint: disable=ungrouped-imports
from modules import ui_control_helpers as helpers
@@ -21,13 +21,36 @@
debug('Trace: CONTROL')
-def return_controls(res):
+def return_stats(t: float = None):
+ if t is None:
+ elapsed_text = ''
+ else:
+ elapsed = time.perf_counter() - t
+ elapsed_m = int(elapsed // 60)
+ elapsed_s = elapsed % 60
+ elapsed_text = f"Time: {elapsed_m}m {elapsed_s:.2f}s |" if elapsed_m > 0 else f"Time: {elapsed_s:.2f}s |"
+ summary = timer.process.summary(min_time=0.1, total=False).replace('=', ' ')
+ vram_html = ''
+ if not shared.mem_mon.disabled:
+ vram = {k: -(v//-(1024*1024)) for k, v in shared.mem_mon.read().items()}
+ used = round(100 * vram['used'] / (vram['total'] + 0.001))
+ if vram.get('active_peak', 0) > 0:
+ vram_html += f"| GPU {max(vram['active_peak'], vram['reserved_peak'])} MB {used}%"
+ vram_html += f" | retries {vram['retries']} oom {vram['oom']}" if vram.get('retries', 0) > 0 or vram.get('oom', 0) > 0 else ''
+ return f""
+
+
+def return_controls(res, t: float = None):
# return preview, image, video, gallery, text
debug(f'Control received: type={type(res)} {res}')
+ if t is None:
+ perf = ''
+ else:
+ perf = return_stats(t)
if res is None: # no response
- return [None, None, None, None, '']
+ return [None, None, None, None, '', perf]
elif isinstance(res, str): # error response
- return [None, None, None, None, res]
+ return [None, None, None, None, res, perf]
elif isinstance(res, tuple): # standard response received as tuple via control_run->yield(output_images, process_image, result_txt)
preview_image = res[1] # may be None
output_image = res[0][0] if isinstance(res[0], list) else res[0] # may be image or list of images
@@ -37,9 +60,9 @@ def return_controls(res):
output_gallery = [res[0]] if res[0] is not None else [] # must return list, but can receive single image
result_txt = res[2] if len(res) > 2 else '' # do we have a message
output_video = res[3] if len(res) > 3 else None # do we have a video filename
- return [preview_image, output_image, output_video, output_gallery, result_txt]
+ return [preview_image, output_image, output_video, output_gallery, result_txt, perf]
else: # unexpected
- return [None, None, None, None, f'Control: Unexpected response: {type(res)}']
+ return [None, None, None, None, f'Control: Unexpected response: {type(res)}', perf]
def get_units(*values):
@@ -67,17 +90,18 @@ def generate_click(job_id: str, state: str, active_tab: str, *args):
shared.state.begin('Generate')
progress.add_task_to_queue(job_id)
with call_queue.queue_lock:
- yield [None, None, None, None, 'Control: starting']
+ yield [None, None, None, None, 'Control: starting', '']
shared.mem_mon.reset()
progress.start_task(job_id)
try:
+ t = time.perf_counter()
for results in control_run(state, units, helpers.input_source, helpers.input_init, helpers.input_mask, active_tab, True, *args):
progress.record_results(job_id, results)
- yield return_controls(results)
+ yield return_controls(results, t)
except Exception as e:
shared.log.error(f"Control exception: {e}")
errors.display(e, 'Control')
- yield [None, None, None, None, f'Control: Exception: {e}']
+ yield [None, None, None, None, f'Control: Exception: {e}', '']
progress.finish_task(job_id)
shared.state.end()
@@ -106,7 +130,8 @@ def create_ui(_blocks: gr.Blocks=None):
with gr.Accordion(open=False, label="Input", elem_id="control_input", elem_classes=["small-accordion"]):
with gr.Row():
- show_preview = gr.Checkbox(label="Show preview", value=True, elem_id="control_show_preview")
+ show_input = gr.Checkbox(label="Show input", value=True, elem_id="control_show_input")
+ show_preview = gr.Checkbox(label="Show preview", value=False, elem_id="control_show_preview")
with gr.Row():
input_type = gr.Radio(label="Input type", choices=['Control only', 'Init image same as control', 'Separate init image'], value='Control only', type='index', elem_id='control_input_type')
with gr.Row():
@@ -153,13 +178,13 @@ def create_ui(_blocks: gr.Blocks=None):
override_settings = ui_common.create_override_inputs('control')
with gr.Row(variant='compact', elem_id="control_extra_networks", elem_classes=["extra_networks_root"], visible=False) as extra_networks_ui:
- from modules import timer, ui_extra_networks
+ from modules import ui_extra_networks
extra_networks_ui = ui_extra_networks.create_ui(extra_networks_ui, btn_extra, 'control', skip_indexing=shared.opts.extra_network_skip_indexing)
timer.startup.record('ui-networks')
with gr.Row(elem_id='control-inputs'):
- with gr.Column(scale=9, elem_id='control-input-column', visible=True) as _column_input:
- gr.HTML('Control input
')
+ with gr.Column(scale=9, elem_id='control-input-column', visible=True) as column_input:
+ gr.HTML('Input')
with gr.Tabs(elem_classes=['control-tabs'], elem_id='control-tab-input'):
with gr.Tab('Image', id='in-image') as tab_image:
input_mode = gr.Label(value='select', visible=False)
@@ -190,12 +215,12 @@ def create_ui(_blocks: gr.Blocks=None):
gr.HTML('Output')
with gr.Tabs(elem_classes=['control-tabs'], elem_id='control-tab-output') as output_tabs:
with gr.Tab('Gallery', id='out-gallery'):
- output_gallery, _output_gen_info, _output_html_info, _output_html_info_formatted, _output_html_log = ui_common.create_output_panel("control", preview=True, prompt=prompt, height=gr_height)
+ output_gallery, _output_gen_info, _output_html_info, _output_html_info_formatted, output_html_log = ui_common.create_output_panel("control", preview=True, prompt=prompt, height=gr_height)
with gr.Tab('Image', id='out-image'):
output_image = gr.Image(label="Output", show_label=False, type="pil", interactive=False, tool="editor", height=gr_height, elem_id='control_output_image', elem_classes=['control-image'])
with gr.Tab('Video', id='out-video'):
output_video = gr.Video(label="Output", show_label=False, height=gr_height, elem_id='control_output_video', elem_classes=['control-image'])
- with gr.Column(scale=9, elem_id='control-preview-column', visible=True) as column_preview:
+ with gr.Column(scale=9, elem_id='control-preview-column', visible=False) as column_preview:
gr.HTML('Preview')
with gr.Tabs(elem_classes=['control-tabs'], elem_id='control-tab-preview'):
with gr.Tab('Preview', id='preview-image') as _tab_preview:
@@ -498,6 +523,7 @@ def create_ui(_blocks: gr.Blocks=None):
btn_update = gr.Button('Update', interactive=True, visible=False, elem_id='control_update')
btn_update.click(fn=get_units, inputs=controls, outputs=[], show_progress=True, queue=False)
+ show_input.change(fn=lambda x: gr.update(visible=x), inputs=[show_input], outputs=[column_input])
show_preview.change(fn=lambda x: gr.update(visible=x), inputs=[show_preview], outputs=[column_preview])
input_type.change(fn=lambda x: gr.update(visible=x == 2), inputs=[input_type], outputs=[column_init])
btn_prompt_counter.click(fn=call_queue.wrap_queued_call(ui_common.update_token_counter), inputs=[prompt, steps], outputs=[prompt_counter])
@@ -550,6 +576,7 @@ def create_ui(_blocks: gr.Blocks=None):
output_video,
output_gallery,
result_txt,
+ output_html_log,
]
control_dict = dict(
fn=generate_click,
From 49e6c1564c6821808e5ba49c93dd06feadd23979 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Tue, 26 Nov 2024 13:13:04 -0500
Subject: [PATCH 021/162] add style aligned
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 47 +++--
modules/processing_class.py | 4 +-
modules/processing_helpers.py | 4 +-
modules/sd_samplers.py | 2 +
modules/style_aligned/inversion.py | 124 ++++++++++++
modules/style_aligned/sa_handler.py | 281 ++++++++++++++++++++++++++++
scripts/style_aligned.py | 117 ++++++++++++
7 files changed, 559 insertions(+), 20 deletions(-)
create mode 100644 modules/style_aligned/inversion.py
create mode 100644 modules/style_aligned/sa_handler.py
create mode 100644 scripts/style_aligned.py
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3ff4f0944..8183fbbc8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,11 +2,13 @@
## Update for 2024-11-26
-- [Flux Tools](https://blackforestlabs.ai/flux-1-tools/):
+### New models and integrations
+
+- [Flux Tools](https://blackforestlabs.ai/flux-1-tools/)
**Redux** is actually a tool, **Fill** is inpaint/outpaint optimized version of *Flux-dev*
**Canny** & **Depth** are optimized versions of *Flux-dev* for their respective tasks: they are *not* ControlNets that work on top of a model
- To use, go to image or control interface and select *Flux Tools* in scripts
- All models are auto-downloaded on first use
+ to use, go to image or control interface and select *Flux Tools* in scripts
+ all models are auto-downloaded on first use
*note*: All models are [gated](https://github.com/vladmandic/automatic/wiki/Gated) and require acceptance of terms and conditions via web page
*recommended*: Enable on-the-fly [quantization](https://github.com/vladmandic/automatic/wiki/Quantization) or [compression](https://github.com/vladmandic/automatic/wiki/NNCF-Compression) to reduce resource usage
*todo*: support for Canny/Depth LoRAs
@@ -19,16 +21,23 @@
*recommended*: guidance scale 30
- [Depth](https://huggingface.co/black-forest-labs/FLUX.1-Depth-dev): ~23.8GB, replaces currently loaded model
*recommended*: guidance scale 10
-- Model loader improvements:
+- [Style Aligned Image Generation](https://style-aligned-gen.github.io/)
+ enable in scripts, compatible with sd-xl
+ enter multiple prompts in prompt field separated by new line
+ style-aligned applies selected attention layers uniformly to all images to achive consistency
+ can be used with or without input image in which case first prompt is used to establish baseline
+ *note:* all prompts are processes as a single batch, so vram is limiting factor
+
+### UI and workflow improvements
+
+- **Model loader** improvements:
- detect model components on model load fail
- Flux, SD35: force unload model
- Flux: apply `bnb` quant when loading *unet/transformer*
- Flux: all-in-one safetensors
example:
- Flux: do not recast quants
-- Sampler improvements
- - update DPM FlowMatch samplers
-- UI:
+- **UI**:
- improved stats on generate completion
- improved live preview display and performance
- improved accordion behavior
@@ -37,16 +46,20 @@
- control: optionn to hide input column
- control: add stats
- browser->server logging framework
-- Fixes:
- - update `diffusers`
- - fix README links
- - fix sdxl controlnet single-file loader
- - relax settings validator
- - improve js progress calls resiliency
- - fix text-to-video pipeline
- - avoid live-preview if vae-decode is running
- - allow xyz-grid with multi-axis s&r
- - fix xyz-grid with lora
+- **Sampler** improvements
+ - update DPM FlowMatch samplers
+
+### Fixes:
+
+- update `diffusers`
+- fix README links
+- fix sdxl controlnet single-file loader
+- relax settings validator
+- improve js progress calls resiliency
+- fix text-to-video pipeline
+- avoid live-preview if vae-decode is running
+- allow xyz-grid with multi-axis s&r
+- fix xyz-grid with lora
## Update for 2024-11-21
diff --git a/modules/processing_class.py b/modules/processing_class.py
index 79f51576f..21e86c1b0 100644
--- a/modules/processing_class.py
+++ b/modules/processing_class.py
@@ -31,8 +31,8 @@ def __init__(self,
n_iter: int = 1,
steps: int = 50,
clip_skip: int = 1,
- width: int = 512,
- height: int = 512,
+ width: int = 1024,
+ height: int = 1024,
# samplers
sampler_index: int = None, # pylint: disable=unused-argument # used only to set sampler_name
sampler_name: str = None,
diff --git a/modules/processing_helpers.py b/modules/processing_helpers.py
index ec7fbf048..22acf296c 100644
--- a/modules/processing_helpers.py
+++ b/modules/processing_helpers.py
@@ -561,7 +561,9 @@ def save_intermediate(p, latents, suffix):
def update_sampler(p, sd_model, second_pass=False):
sampler_selection = p.hr_sampler_name if second_pass else p.sampler_name
if hasattr(sd_model, 'scheduler'):
- if sampler_selection is None or sampler_selection == 'None':
+ if sampler_selection == 'None':
+ return
+ if sampler_selection is None:
sampler = sd_samplers.all_samplers_map.get("UniPC")
else:
sampler = sd_samplers.all_samplers_map.get(sampler_selection, None)
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index e560744dd..d8416e5d9 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -47,6 +47,8 @@ def visible_sampler_names():
def create_sampler(name, model):
+ if name is None or name == 'None':
+ return model.scheduler
try:
current = model.scheduler.__class__.__name__
except Exception:
diff --git a/modules/style_aligned/inversion.py b/modules/style_aligned/inversion.py
new file mode 100644
index 000000000..8c91cc02a
--- /dev/null
+++ b/modules/style_aligned/inversion.py
@@ -0,0 +1,124 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from __future__ import annotations
+from typing import Callable, TYPE_CHECKING
+from diffusers import StableDiffusionXLPipeline
+import torch
+from tqdm import tqdm
+if TYPE_CHECKING:
+ import numpy as np
+
+
+T = torch.Tensor
+TN = T
+InversionCallback = Callable[[StableDiffusionXLPipeline, int, T, dict[str, T]], dict[str, T]]
+
+
+def _get_text_embeddings(prompt: str, tokenizer, text_encoder, device):
+ # Tokenize text and get embeddings
+ text_inputs = tokenizer(prompt, padding='max_length', max_length=tokenizer.model_max_length, truncation=True, return_tensors='pt')
+ text_input_ids = text_inputs.input_ids
+
+ with torch.no_grad():
+ prompt_embeds = text_encoder(
+ text_input_ids.to(device),
+ output_hidden_states=True,
+ )
+
+ pooled_prompt_embeds = prompt_embeds[0]
+ prompt_embeds = prompt_embeds.hidden_states[-2]
+ if prompt == '':
+ negative_prompt_embeds = torch.zeros_like(prompt_embeds)
+ negative_pooled_prompt_embeds = torch.zeros_like(pooled_prompt_embeds)
+ return negative_prompt_embeds, negative_pooled_prompt_embeds
+ return prompt_embeds, pooled_prompt_embeds
+
+
+def _encode_text_sdxl(model: StableDiffusionXLPipeline, prompt: str) -> tuple[dict[str, T], T]:
+ device = model._execution_device # pylint: disable=protected-access
+ prompt_embeds, pooled_prompt_embeds, = _get_text_embeddings(prompt, model.tokenizer, model.text_encoder, device) # pylint: disable=unused-variable
+ prompt_embeds_2, pooled_prompt_embeds2, = _get_text_embeddings( prompt, model.tokenizer_2, model.text_encoder_2, device)
+ prompt_embeds = torch.cat((prompt_embeds, prompt_embeds_2), dim=-1)
+ text_encoder_projection_dim = model.text_encoder_2.config.projection_dim
+ add_time_ids = model._get_add_time_ids((1024, 1024), (0, 0), (1024, 1024), model.text_encoder.dtype, # pylint: disable=protected-access
+ text_encoder_projection_dim).to(device)
+ added_cond_kwargs = {"text_embeds": pooled_prompt_embeds2, "time_ids": add_time_ids}
+ return added_cond_kwargs, prompt_embeds
+
+
+def _encode_text_sdxl_with_negative(model: StableDiffusionXLPipeline, prompt: str) -> tuple[dict[str, T], T]:
+ added_cond_kwargs, prompt_embeds = _encode_text_sdxl(model, prompt)
+ added_cond_kwargs_uncond, prompt_embeds_uncond = _encode_text_sdxl(model, "")
+ prompt_embeds = torch.cat((prompt_embeds_uncond, prompt_embeds, ))
+ added_cond_kwargs = {"text_embeds": torch.cat((added_cond_kwargs_uncond["text_embeds"], added_cond_kwargs["text_embeds"])),
+ "time_ids": torch.cat((added_cond_kwargs_uncond["time_ids"], added_cond_kwargs["time_ids"])),}
+ return added_cond_kwargs, prompt_embeds
+
+
+def _encode_image(model: StableDiffusionXLPipeline, image: np.ndarray) -> T:
+ image = torch.from_numpy(image).float() / 255.
+ image = (image * 2 - 1).permute(2, 0, 1).unsqueeze(0)
+ latent = model.vae.encode(image.to(model.vae.device, model.vae.dtype))['latent_dist'].mean * model.vae.config.scaling_factor
+ return latent
+
+
+def _next_step(model: StableDiffusionXLPipeline, model_output: T, timestep: int, sample: T) -> T:
+ timestep, next_timestep = min(timestep - model.scheduler.config.num_train_timesteps // model.scheduler.num_inference_steps, 999), timestep
+ alpha_prod_t = model.scheduler.alphas_cumprod[int(timestep)] if timestep >= 0 else model.scheduler.final_alpha_cumprod
+ alpha_prod_t_next = model.scheduler.alphas_cumprod[int(next_timestep)]
+ beta_prod_t = 1 - alpha_prod_t
+ next_original_sample = (sample - beta_prod_t ** 0.5 * model_output) / alpha_prod_t ** 0.5
+ next_sample_direction = (1 - alpha_prod_t_next) ** 0.5 * model_output
+ next_sample = alpha_prod_t_next ** 0.5 * next_original_sample + next_sample_direction
+ return next_sample
+
+
+def _get_noise_pred(model: StableDiffusionXLPipeline, latent: T, t: T, context: T, guidance_scale: float, added_cond_kwargs: dict[str, T]):
+ latents_input = torch.cat([latent] * 2)
+ noise_pred = model.unet(latents_input, t, encoder_hidden_states=context, added_cond_kwargs=added_cond_kwargs)["sample"]
+ noise_pred_uncond, noise_prediction_text = noise_pred.chunk(2)
+ noise_pred = noise_pred_uncond + guidance_scale * (noise_prediction_text - noise_pred_uncond)
+ # latents = next_step(model, noise_pred, t, latent)
+ return noise_pred
+
+
+def _ddim_loop(model: StableDiffusionXLPipeline, z0, prompt, guidance_scale) -> T:
+ all_latent = [z0]
+ added_cond_kwargs, text_embedding = _encode_text_sdxl_with_negative(model, prompt)
+ latent = z0.clone().detach().to(model.text_encoder.dtype)
+ for i in tqdm(range(model.scheduler.num_inference_steps)):
+ t = model.scheduler.timesteps[len(model.scheduler.timesteps) - i - 1]
+ noise_pred = _get_noise_pred(model, latent, t, text_embedding, guidance_scale, added_cond_kwargs)
+ latent = _next_step(model, noise_pred, t, latent)
+ all_latent.append(latent)
+ return torch.cat(all_latent).flip(0)
+
+
+def make_inversion_callback(zts, offset: int = 0):
+
+ def callback_on_step_end(pipeline: StableDiffusionXLPipeline, i: int, t: T, callback_kwargs: dict[str, T]) -> dict[str, T]: # pylint: disable=unused-argument
+ latents = callback_kwargs['latents']
+ latents[0] = zts[max(offset + 1, i + 1)].to(latents.device, latents.dtype)
+ return {'latents': latents}
+ return zts[offset], callback_on_step_end
+
+
+@torch.no_grad()
+def ddim_inversion(model: StableDiffusionXLPipeline, x0: np.ndarray, prompt: str, num_inference_steps: int, guidance_scale,) -> T:
+ z0 = _encode_image(model, x0)
+ model.scheduler.set_timesteps(num_inference_steps, device=z0.device)
+ zs = _ddim_loop(model, z0, prompt, guidance_scale)
+ return zs
diff --git a/modules/style_aligned/sa_handler.py b/modules/style_aligned/sa_handler.py
new file mode 100644
index 000000000..ee4b1ca79
--- /dev/null
+++ b/modules/style_aligned/sa_handler.py
@@ -0,0 +1,281 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from __future__ import annotations
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+ from diffusers import StableDiffusionXLPipeline
+from dataclasses import dataclass
+import torch
+import torch.nn as nn
+from torch.nn import functional as nnf
+from diffusers.models import attention_processor # pylint: disable=ungrouped-imports
+import einops
+
+T = torch.Tensor
+
+
+@dataclass(frozen=True)
+class StyleAlignedArgs:
+ share_group_norm: bool = True
+ share_layer_norm: bool = True
+ share_attention: bool = True
+ adain_queries: bool = True
+ adain_keys: bool = True
+ adain_values: bool = False
+ full_attention_share: bool = False
+ shared_score_scale: float = 1.
+ shared_score_shift: float = 0.
+ only_self_level: float = 0.
+
+
+def expand_first(feat: T, scale=1.,) -> T:
+ b = feat.shape[0]
+ feat_style = torch.stack((feat[0], feat[b // 2])).unsqueeze(1)
+ if scale == 1:
+ feat_style = feat_style.expand(2, b // 2, *feat.shape[1:])
+ else:
+ feat_style = feat_style.repeat(1, b // 2, 1, 1, 1)
+ feat_style = torch.cat([feat_style[:, :1], scale * feat_style[:, 1:]], dim=1)
+ return feat_style.reshape(*feat.shape)
+
+
+def concat_first(feat: T, dim=2, scale=1.) -> T:
+ feat_style = expand_first(feat, scale=scale)
+ return torch.cat((feat, feat_style), dim=dim)
+
+
+def calc_mean_std(feat, eps: float = 1e-5) -> tuple[T, T]:
+ feat_std = (feat.var(dim=-2, keepdims=True) + eps).sqrt()
+ feat_mean = feat.mean(dim=-2, keepdims=True)
+ return feat_mean, feat_std
+
+
+def adain(feat: T) -> T:
+ feat_mean, feat_std = calc_mean_std(feat)
+ feat_style_mean = expand_first(feat_mean)
+ feat_style_std = expand_first(feat_std)
+ feat = (feat - feat_mean) / feat_std
+ feat = feat * feat_style_std + feat_style_mean
+ return feat
+
+
+class DefaultAttentionProcessor(nn.Module):
+
+ def __init__(self):
+ super().__init__()
+ self.processor = attention_processor.AttnProcessor2_0()
+
+ def __call__(self, attn: attention_processor.Attention, hidden_states, encoder_hidden_states=None,
+ attention_mask=None, **kwargs):
+ return self.processor(attn, hidden_states, encoder_hidden_states, attention_mask)
+
+
+class SharedAttentionProcessor(DefaultAttentionProcessor):
+
+ def shifted_scaled_dot_product_attention(self, attn: attention_processor.Attention, query: T, key: T, value: T) -> T:
+ logits = torch.einsum('bhqd,bhkd->bhqk', query, key) * attn.scale
+ logits[:, :, :, query.shape[2]:] += self.shared_score_shift
+ probs = logits.softmax(-1)
+ return torch.einsum('bhqk,bhkd->bhqd', probs, value)
+
+ def shared_call( # pylint: disable=unused-argument
+ self,
+ attn: attention_processor.Attention,
+ hidden_states,
+ encoder_hidden_states=None,
+ attention_mask=None,
+ **kwargs
+ ):
+
+ residual = hidden_states
+ input_ndim = hidden_states.ndim
+ if input_ndim == 4:
+ batch_size, channel, height, width = hidden_states.shape
+ hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2)
+ batch_size, sequence_length, _ = (
+ hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
+ )
+
+ if attention_mask is not None:
+ attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
+ # scaled_dot_product_attention expects attention_mask shape to be
+ # (batch, heads, source_length, target_length)
+ attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1])
+
+ if attn.group_norm is not None:
+ hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
+
+ query = attn.to_q(hidden_states)
+ key = attn.to_k(hidden_states)
+ value = attn.to_v(hidden_states)
+ inner_dim = key.shape[-1]
+ head_dim = inner_dim // attn.heads
+
+ query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
+ key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
+ value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
+ # if self.step >= self.start_inject:
+ if self.adain_queries:
+ query = adain(query)
+ if self.adain_keys:
+ key = adain(key)
+ if self.adain_values:
+ value = adain(value)
+ if self.share_attention:
+ key = concat_first(key, -2, scale=self.shared_score_scale)
+ value = concat_first(value, -2)
+ if self.shared_score_shift != 0:
+ hidden_states = self.shifted_scaled_dot_product_attention(attn, query, key, value,)
+ else:
+ hidden_states = nnf.scaled_dot_product_attention(
+ query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
+ )
+ else:
+ hidden_states = nnf.scaled_dot_product_attention(
+ query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
+ )
+ # hidden_states = adain(hidden_states)
+ hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
+ hidden_states = hidden_states.to(query.dtype)
+
+ # linear proj
+ hidden_states = attn.to_out[0](hidden_states)
+ # dropout
+ hidden_states = attn.to_out[1](hidden_states)
+
+ if input_ndim == 4:
+ hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width)
+
+ if attn.residual_connection:
+ hidden_states = hidden_states + residual
+
+ hidden_states = hidden_states / attn.rescale_output_factor
+ return hidden_states
+
+ def __call__(self, attn: attention_processor.Attention, hidden_states, encoder_hidden_states=None,
+ attention_mask=None, **kwargs):
+ if self.full_attention_share:
+ _b, n, _d = hidden_states.shape
+ hidden_states = einops.rearrange(hidden_states, '(k b) n d -> k (b n) d', k=2)
+ hidden_states = super().__call__(attn, hidden_states, encoder_hidden_states=encoder_hidden_states,
+ attention_mask=attention_mask, **kwargs)
+ hidden_states = einops.rearrange(hidden_states, 'k (b n) d -> (k b) n d', n=n)
+ else:
+ hidden_states = self.shared_call(attn, hidden_states, hidden_states, attention_mask, **kwargs)
+
+ return hidden_states
+
+ def __init__(self, style_aligned_args: StyleAlignedArgs):
+ super().__init__()
+ self.share_attention = style_aligned_args.share_attention
+ self.adain_queries = style_aligned_args.adain_queries
+ self.adain_keys = style_aligned_args.adain_keys
+ self.adain_values = style_aligned_args.adain_values
+ self.full_attention_share = style_aligned_args.full_attention_share
+ self.shared_score_scale = style_aligned_args.shared_score_scale
+ self.shared_score_shift = style_aligned_args.shared_score_shift
+
+
+def _get_switch_vec(total_num_layers, level):
+ if level <= 0:
+ return torch.zeros(total_num_layers, dtype=torch.bool)
+ if level >= 1:
+ return torch.ones(total_num_layers, dtype=torch.bool)
+ to_flip = level > .5
+ if to_flip:
+ level = 1 - level
+ num_switch = int(level * total_num_layers)
+ vec = torch.arange(total_num_layers)
+ vec = vec % (total_num_layers // num_switch)
+ vec = vec == 0
+ if to_flip:
+ vec = ~vec
+ return vec
+
+
+def init_attention_processors(pipeline: StableDiffusionXLPipeline, style_aligned_args: StyleAlignedArgs | None = None):
+ attn_procs = {}
+ unet = pipeline.unet
+ number_of_self, number_of_cross = 0, 0
+ num_self_layers = len([name for name in unet.attn_processors.keys() if 'attn1' in name])
+ if style_aligned_args is None:
+ only_self_vec = _get_switch_vec(num_self_layers, 1)
+ else:
+ only_self_vec = _get_switch_vec(num_self_layers, style_aligned_args.only_self_level)
+ for i, name in enumerate(unet.attn_processors.keys()):
+ is_self_attention = 'attn1' in name
+ if is_self_attention:
+ number_of_self += 1
+ if style_aligned_args is None or only_self_vec[i // 2]:
+ attn_procs[name] = DefaultAttentionProcessor()
+ else:
+ attn_procs[name] = SharedAttentionProcessor(style_aligned_args)
+ else:
+ number_of_cross += 1
+ attn_procs[name] = DefaultAttentionProcessor()
+
+ unet.set_attn_processor(attn_procs)
+
+
+def register_shared_norm(pipeline: StableDiffusionXLPipeline,
+ share_group_norm: bool = True,
+ share_layer_norm: bool = True,
+ ):
+ def register_norm_forward(norm_layer: nn.GroupNorm | nn.LayerNorm) -> nn.GroupNorm | nn.LayerNorm:
+ if not hasattr(norm_layer, 'orig_forward'):
+ setattr(norm_layer, 'orig_forward', norm_layer.forward) # noqa
+ orig_forward = norm_layer.orig_forward
+
+ def forward_(hidden_states: T) -> T:
+ n = hidden_states.shape[-2]
+ hidden_states = concat_first(hidden_states, dim=-2)
+ hidden_states = orig_forward(hidden_states)
+ return hidden_states[..., :n, :]
+
+ norm_layer.forward = forward_
+ return norm_layer
+
+ def get_norm_layers(pipeline_, norm_layers_: dict[str, list[nn.GroupNorm | nn.LayerNorm]]):
+ if isinstance(pipeline_, nn.LayerNorm) and share_layer_norm:
+ norm_layers_['layer'].append(pipeline_)
+ if isinstance(pipeline_, nn.GroupNorm) and share_group_norm:
+ norm_layers_['group'].append(pipeline_)
+ else:
+ for layer in pipeline_.children():
+ get_norm_layers(layer, norm_layers_)
+
+ norm_layers = {'group': [], 'layer': []}
+ get_norm_layers(pipeline.unet, norm_layers)
+ return [register_norm_forward(layer) for layer in norm_layers['group']] + [register_norm_forward(layer) for layer in
+ norm_layers['layer']]
+
+
+class Handler:
+
+ def register(self, style_aligned_args: StyleAlignedArgs):
+ self.norm_layers = register_shared_norm(self.pipeline, style_aligned_args.share_group_norm,
+ style_aligned_args.share_layer_norm)
+ init_attention_processors(self.pipeline, style_aligned_args)
+
+ def remove(self):
+ for layer in self.norm_layers:
+ layer.forward = layer.orig_forward
+ self.norm_layers = []
+ init_attention_processors(self.pipeline, None)
+
+ def __init__(self, pipeline: StableDiffusionXLPipeline):
+ self.pipeline = pipeline
+ self.norm_layers = []
diff --git a/scripts/style_aligned.py b/scripts/style_aligned.py
new file mode 100644
index 000000000..25feb49bc
--- /dev/null
+++ b/scripts/style_aligned.py
@@ -0,0 +1,117 @@
+import gradio as gr
+import torch
+import numpy as np
+import diffusers
+from modules import scripts, processing, shared, devices
+
+
+handler = None
+zts = None
+supported_model_list = ['sdxl']
+orig_prompt_attention = None
+
+
+class Script(scripts.Script):
+ def title(self):
+ return 'Style Aligned Image Generation'
+
+ def show(self, is_img2img):
+ return shared.native
+
+ def reset(self):
+ global handler, zts # pylint: disable=global-statement
+ handler = None
+ zts = None
+ shared.log.info('SA: image upload')
+
+ def preset(self, preset):
+ if preset == 'text':
+ return [['attention', 'adain_queries', 'adain_keys'], 1.0, 0, 0.0]
+ if preset == 'image':
+ return [['group_norm', 'layer_norm', 'attention', 'adain_queries', 'adain_keys'], 1.0, 2, 0.0]
+ if preset == 'all':
+ return [['group_norm', 'layer_norm', 'attention', 'adain_queries', 'adain_keys', 'adain_values', 'full_attention_share'], 1.0, 1, 0.5]
+
+ def ui(self, _is_img2img): # ui elements
+ with gr.Row():
+ gr.HTML('  Style Aligned Image Generation
')
+ with gr.Row():
+ preset = gr.Dropdown(label="Preset", choices=['text', 'image', 'all'], value='text')
+ scheduler = gr.Checkbox(label="Override scheduler", value=False)
+ with gr.Row():
+ shared_opts = gr.Dropdown(label="Shared options",
+ multiselect=True,
+ choices=['group_norm', 'layer_norm', 'attention', 'adain_queries', 'adain_keys', 'adain_values', 'full_attention_share'],
+ value=['attention', 'adain_queries', 'adain_keys'],
+ )
+ with gr.Row():
+ shared_score_scale = gr.Slider(label="Scale", minimum=0.0, maximum=2.0, step=0.01, value=1.0)
+ shared_score_shift = gr.Slider(label="Shift", minimum=0, maximum=10, step=1, value=0)
+ only_self_level = gr.Slider(label="Level", minimum=0.0, maximum=1.0, step=0.01, value=0.0)
+ with gr.Row():
+ prompt = gr.Textbox(lines=1, label='Optional image description', placeholder='use the style from the image')
+ with gr.Row():
+ image = gr.Image(label='Optional image', source='upload', type='pil')
+
+ image.change(self.reset)
+ preset.change(self.preset, inputs=[preset], outputs=[shared_opts, shared_score_scale, shared_score_shift, only_self_level])
+
+ return [image, prompt, scheduler, shared_opts, shared_score_scale, shared_score_shift, only_self_level]
+
+ def run(self, p: processing.StableDiffusionProcessing, image, prompt, scheduler, shared_opts, shared_score_scale, shared_score_shift, only_self_level): # pylint: disable=arguments-differ
+ global handler, zts, orig_prompt_attention # pylint: disable=global-statement
+ if shared.sd_model_type not in supported_model_list:
+ shared.log.warning(f'SA: class={shared.sd_model.__class__.__name__} model={shared.sd_model_type} required={supported_model_list}')
+ return None
+
+ from modules.style_aligned import sa_handler, inversion
+
+ handler = sa_handler.Handler(shared.sd_model)
+ sa_args = sa_handler.StyleAlignedArgs(
+ share_group_norm='group_norm' in shared_opts,
+ share_layer_norm='layer_norm' in shared_opts,
+ share_attention='attention' in shared_opts,
+ adain_queries='adain_queries' in shared_opts,
+ adain_keys='adain_keys' in shared_opts,
+ adain_values='adain_values' in shared_opts,
+ full_attention_share='full_attention_share' in shared_opts,
+ shared_score_scale=float(shared_score_scale),
+ shared_score_shift=np.log(shared_score_shift) if shared_score_shift > 0 else 0,
+ only_self_level=1 if only_self_level else 0,
+ )
+ handler.register(sa_args)
+
+ if scheduler:
+ shared.sd_model.scheduler = diffusers.DDIMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", clip_sample=False, set_alpha_to_one=False)
+ p.sampler_name = 'None'
+
+ if image is not None and zts is None:
+ shared.log.info(f'SA: inversion image={image} prompt="{prompt}"')
+ image = image.resize((1024, 1024))
+ x0 = np.array(image).astype(np.float32) / 255.0
+ shared.sd_model.scheduler = diffusers.DDIMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", clip_sample=False, set_alpha_to_one=False)
+ zts = inversion.ddim_inversion(shared.sd_model, x0, prompt, num_inference_steps=50, guidance_scale=2)
+
+ p.prompt = p.prompt.splitlines()
+ p.batch_size = len(p.prompt)
+ orig_prompt_attention = shared.opts.prompt_attention
+ shared.opts.data['prompt_attention'] = 'fixed' # otherwise need to deal with class_tokens_mask
+
+ if zts is not None:
+ processing.fix_seed(p)
+ zT, inversion_callback = inversion.make_inversion_callback(zts, offset=0)
+ generator = torch.Generator(device='cpu')
+ generator.manual_seed(p.seed)
+ latents = torch.randn(p.batch_size, 4, 128, 128, device='cpu', generator=generator, dtype=devices.dtype,).to(devices.device)
+ latents[0] = zT
+ p.task_args['latents'] = latents
+ p.task_args['callback_on_step_end'] = inversion_callback
+
+ shared.log.info(f'SA: batch={p.batch_size} type={"image" if zts is not None else "text"} config={sa_args.__dict__}')
+
+ def after(self, p: processing.StableDiffusionProcessing, *args): # pylint: disable=unused-argument
+ global handler # pylint: disable=global-statement
+ if handler is not None:
+ handler.remove()
+ handler = None
+ shared.opts.data['prompt_attention'] = orig_prompt_attention
From 00ebdf3476526b136594e98ed9540fb24a4f5fd9 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Wed, 27 Nov 2024 10:07:37 -0500
Subject: [PATCH 022/162] fix api scripts callbacks
Signed-off-by: Vladimir Mandic
---
.eslintrc.json | 3 ++-
CHANGELOG.md | 5 +++--
cli/api-pulid.js | 24 +++++++++++++++++-------
modules/api/control.py | 2 ++
modules/api/generate.py | 4 ++++
modules/control/run.py | 1 -
modules/shared.py | 4 ++--
scripts/pulid_ext.py | 6 ++++--
8 files changed, 34 insertions(+), 15 deletions(-)
diff --git a/.eslintrc.json b/.eslintrc.json
index 2dddb41a1..c86dbb749 100644
--- a/.eslintrc.json
+++ b/.eslintrc.json
@@ -37,7 +37,8 @@
"object-curly-newline":"off",
"prefer-rest-params":"off",
"prefer-destructuring":"off",
- "radix":"off"
+ "radix":"off",
+ "node/shebang": "off"
},
"globals": {
// asssets
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8183fbbc8..3ad139f0e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
# Change Log for SD.Next
-## Update for 2024-11-26
+## Update for 2024-11-27
### New models and integrations
@@ -49,7 +49,7 @@
- **Sampler** improvements
- update DPM FlowMatch samplers
-### Fixes:
+### Fixes
- update `diffusers`
- fix README links
@@ -60,6 +60,7 @@
- avoid live-preview if vae-decode is running
- allow xyz-grid with multi-axis s&r
- fix xyz-grid with lora
+- fix api script callbacks
## Update for 2024-11-21
diff --git a/cli/api-pulid.js b/cli/api-pulid.js
index fde0ae43b..033824e9b 100755
--- a/cli/api-pulid.js
+++ b/cli/api-pulid.js
@@ -10,12 +10,13 @@ const argparse = require('argparse');
const sd_url = process.env.SDAPI_URL || 'http://127.0.0.1:7860';
const sd_username = process.env.SDAPI_USR;
const sd_password = process.env.SDAPI_PWD;
+let args = {};
function b64(file) {
const data = fs.readFileSync(file);
- const b64 = Buffer.from(data).toString('base64');
+ const b64str = Buffer.from(data).toString('base64');
const ext = path.extname(file).replace('.', '');
- str = `data:image/${ext};base64,${b64}`;
+ const str = `data:image/${ext};base64,${b64str}`;
// console.log('b64:', ext, b64.length);
return str;
}
@@ -39,7 +40,16 @@ function options() {
if (args.pulid) {
const b64image = b64(args.pulid);
opt.script_name = 'pulid';
- opt.script_args = [b64image, 0.9];
+ opt.script_args = [
+ b64image, // b64 encoded image, required param
+ 0.9, // strength, optional
+ 20, // zero, optional
+ 'dpmpp_sde', // sampler, optional
+ 'v2', // ortho, optional
+ true, // restore (disable pulid after run), optional
+ true, // offload, optional
+ 'v1.1', // version, optional
+ ];
}
// console.log('options:', opt);
return opt;
@@ -53,8 +63,8 @@ function init() {
parser.add_argument('--height', { type: 'int', help: 'height' });
parser.add_argument('--pulid', { type: 'str', help: 'pulid init image' });
parser.add_argument('--output', { type: 'str', help: 'output path' });
- const args = parser.parse_args();
- return args
+ const parsed = parser.parse_args();
+ return parsed;
}
async function main() {
@@ -73,12 +83,12 @@ async function main() {
console.log('result:', json.info);
for (const i in json.images) { // eslint-disable-line guard-for-in
const file = args.output || `/tmp/test-${i}.jpg`;
- const data = atob(json.images[i])
+ const data = atob(json.images[i]);
fs.writeFileSync(file, data, 'binary');
console.log('image saved:', file);
}
}
}
-const args = init();
+args = init();
main();
diff --git a/modules/api/control.py b/modules/api/control.py
index 29c5a77f1..345930341 100644
--- a/modules/api/control.py
+++ b/modules/api/control.py
@@ -159,6 +159,8 @@ def post_control(self, req: ReqControl):
output_images = []
output_processed = []
output_info = ''
+ # TODO control script process
+ # init script args, call scripts.script_control.run, call scripts.script_control.after
run.control_set({ 'do_not_save_grid': not req.save_images, 'do_not_save_samples': not req.save_images, **self.prepare_ip_adapter(req) })
run.control_set(getattr(req, "extra", {}))
res = run.control_run(**args)
diff --git a/modules/api/generate.py b/modules/api/generate.py
index b8ee645a4..9b409a14b 100644
--- a/modules/api/generate.py
+++ b/modules/api/generate.py
@@ -116,6 +116,8 @@ def post_text2img(self, txt2imgreq: models.ReqTxt2Img):
processed = scripts.scripts_txt2img.run(p, *script_args) # Need to pass args as list here
else:
processed = process_images(p)
+ processed = scripts.scripts_txt2img.after(p, processed, *script_args)
+ p.close()
shared.state.end(api=False)
if processed is None or processed.images is None or len(processed.images) == 0:
b64images = []
@@ -166,6 +168,8 @@ def post_img2img(self, img2imgreq: models.ReqImg2Img):
processed = scripts.scripts_img2img.run(p, *script_args) # Need to pass args as list here
else:
processed = process_images(p)
+ processed = scripts.scripts_img2img.after(p, processed, *script_args)
+ p.close()
shared.state.end(api=False)
if processed is None or processed.images is None or len(processed.images) == 0:
b64images = []
diff --git a/modules/control/run.py b/modules/control/run.py
index 88dddc213..2fe13dd73 100644
--- a/modules/control/run.py
+++ b/modules/control/run.py
@@ -87,7 +87,6 @@ def control_run(state: str = '',
u.process.override = u.override
global instance, pipe, original_pipeline # pylint: disable=global-statement
- t_start = time.time()
debug(f'Control: type={unit_type} input={inputs} init={inits} type={input_type}')
if inputs is None or (type(inputs) is list and len(inputs) == 0):
inputs = [None]
diff --git a/modules/shared.py b/modules/shared.py
index a89cbbc95..0c4d36746 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -559,8 +559,8 @@ def get_default_modes():
"diffusers_extract_ema": OptionInfo(False, "Use model EMA weights when possible"),
"diffusers_generator_device": OptionInfo("GPU", "Generator device", gr.Radio, {"choices": ["GPU", "CPU", "Unset"]}),
"diffusers_offload_mode": OptionInfo(startup_offload_mode, "Model offload mode", gr.Radio, {"choices": ['none', 'balanced', 'model', 'sequential']}),
- "diffusers_offload_max_gpu_memory": OptionInfo(round(gpu_memory * 0.75, 1), "Max GPU memory for balanced offload mode in GB", gr.Slider, {"minimum": 0, "maximum": gpu_memory, "step": 0.01,}),
- "diffusers_offload_max_cpu_memory": OptionInfo(round(cpu_memory * 0.75, 1), "Max CPU memory for balanced offload mode in GB", gr.Slider, {"minimum": 0, "maximum": cpu_memory, "step": 0.01,}),
+ "diffusers_offload_max_gpu_memory": OptionInfo(round(gpu_memory * 0.75, 1), "Max GPU memory before balanced offload", gr.Slider, {"minimum": 0, "maximum": gpu_memory, "step": 0.01, "visible": True }),
+ "diffusers_offload_max_cpu_memory": OptionInfo(round(cpu_memory * 0.75, 1), "Max CPU memory before balanced offload", gr.Slider, {"minimum": 0, "maximum": cpu_memory, "step": 0.01, "visible": False }),
"diffusers_vae_upcast": OptionInfo("default", "VAE upcasting", gr.Radio, {"choices": ['default', 'true', 'false']}),
"diffusers_vae_slicing": OptionInfo(True, "VAE slicing"),
"diffusers_vae_tiling": OptionInfo(cmd_opts.lowvram or cmd_opts.medvram, "VAE tiling"),
diff --git a/scripts/pulid_ext.py b/scripts/pulid_ext.py
index 676fa79f3..ee08e348b 100644
--- a/scripts/pulid_ext.py
+++ b/scripts/pulid_ext.py
@@ -164,11 +164,13 @@ def run(
p.batch_size = 1
sdp = shared.opts.cross_attention_optimization == "Scaled-Dot-Product"
+ sampler_fn = getattr(self.pulid.sampling, f'sample_{sampler}', None)
strength = getattr(p, 'pulid_strength', strength)
zero = getattr(p, 'pulid_zero', zero)
ortho = getattr(p, 'pulid_ortho', ortho)
sampler = getattr(p, 'pulid_sampler', sampler)
- sampler_fn = getattr(self.pulid.sampling, f'sample_{sampler}', None)
+ restore = getattr(p, 'pulid_restore', restore)
+ p.pulid_restore = restore
if sampler_fn is None:
sampler_fn = self.pulid.sampling.sample_dpmpp_2m_sde
@@ -199,7 +201,7 @@ def run(
return None
shared.sd_model.sampler = sampler_fn
- shared.log.info(f'PuLID: class={shared.sd_model.__class__.__name__} version="{version}" sdp={sdp} strength={strength} zero={zero} ortho={ortho} sampler={sampler_fn} images={[i.shape for i in images]} offload={offload}')
+ shared.log.info(f'PuLID: class={shared.sd_model.__class__.__name__} version="{version}" sdp={sdp} strength={strength} zero={zero} ortho={ortho} sampler={sampler_fn} images={[i.shape for i in images]} offload={offload} restore={restore}')
self.pulid.attention.NUM_ZERO = zero
self.pulid.attention.ORTHO = ortho == 'v1'
self.pulid.attention.ORTHO_v2 = ortho == 'v2'
From a2dc3670a1a2bc27cce9fdfd375767c6ec9ace88 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Wed, 27 Nov 2024 15:36:22 -0500
Subject: [PATCH 023/162] model-loader allow absolsute path
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 1 +
cli/api-model.js | 30 ++++++++++++++++++++++++++++++
cli/full-test.sh | 3 +++
launch.py | 8 +++++---
modules/sd_checkpoint.py | 5 +++++
wiki | 2 +-
6 files changed, 45 insertions(+), 4 deletions(-)
create mode 100755 cli/api-model.js
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3ad139f0e..0167d5509 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -32,6 +32,7 @@
- **Model loader** improvements:
- detect model components on model load fail
+ - allow passing absolute path to model loader
- Flux, SD35: force unload model
- Flux: apply `bnb` quant when loading *unet/transformer*
- Flux: all-in-one safetensors
diff --git a/cli/api-model.js b/cli/api-model.js
new file mode 100755
index 000000000..e2ce5344a
--- /dev/null
+++ b/cli/api-model.js
@@ -0,0 +1,30 @@
+#!/usr/bin/env node
+
+const sd_url = process.env.SDAPI_URL || 'http://127.0.0.1:7860';
+const sd_username = process.env.SDAPI_USR;
+const sd_password = process.env.SDAPI_PWD;
+const models = [
+ '/mnt/models/stable-diffusion/sd15/lyriel_v16.safetensors',
+ '/mnt/models/stable-diffusion/flux/flux-finesse_v2-f1h-fp8.safetensors',
+ '/mnt/models/stable-diffusion/sdxl/TempestV0.1-Artistic.safetensors',
+];
+
+async function options(data) {
+ const method = 'POST';
+ const headers = new Headers();
+ const body = JSON.stringify(data);
+ headers.set('Content-Type', 'application/json');
+ if (sd_username && sd_password) headers.set({ Authorization: `Basic ${btoa('sd_username:sd_password')}` });
+ const res = await fetch(`${sd_url}/sdapi/v1/options`, { method, headers, body });
+ return res;
+}
+
+async function main() {
+ for (const model of models) {
+ console.log('model:', model);
+ const res = await options({ sd_model_checkpoint: model });
+ console.log('result:', res);
+ }
+}
+
+main();
diff --git a/cli/full-test.sh b/cli/full-test.sh
index e410528ad..912dc3a5b 100755
--- a/cli/full-test.sh
+++ b/cli/full-test.sh
@@ -1,5 +1,8 @@
#!/usr/bin/env bash
+node cli/api-txt2img.js
+node cli/api-pulid.js
+
source venv/bin/activate
echo image-exif
python cli/api-info.py --input html/logo-bg-0.jpg
diff --git a/launch.py b/launch.py
index f944a7e54..e00da58c7 100755
--- a/launch.py
+++ b/launch.py
@@ -55,9 +55,11 @@ def get_custom_args():
if 'PS1' in env:
del env['PS1']
installer.log.trace(f'Environment: {installer.print_dict(env)}')
- else:
- env = [f'{k}={v}' for k, v in os.environ.items() if k.startswith('SD_')]
- installer.log.debug(f'Env flags: {env}')
+ env = [f'{k}={v}' for k, v in os.environ.items() if k.startswith('SD_')]
+ installer.log.debug(f'Env flags: {env}')
+ ldd = os.environ.get('LD_PRELOAD', None)
+ if ldd is not None:
+ installer.log.debug(f'Linker flags: "{ldd}"')
@lru_cache()
diff --git a/modules/sd_checkpoint.py b/modules/sd_checkpoint.py
index 20654e28b..e035fc3db 100644
--- a/modules/sd_checkpoint.py
+++ b/modules/sd_checkpoint.py
@@ -188,6 +188,11 @@ def get_closet_checkpoint_match(s: str):
if found and len(found) == 1:
return found[0]
+ # absolute path
+ if s.endswith('.safetensors') and os.path.isfile(s):
+ checkpoint_info = CheckpointInfo(s)
+ return checkpoint_info
+
# reference search
"""
found = sorted([info for info in shared.reference_models.values() if os.path.basename(info['path']).lower().startswith(s.lower())], key=lambda x: len(x['path']))
diff --git a/wiki b/wiki
index ba7d78b55..441d2c4e1 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit ba7d78b55eb95afe8509bd0069b8ec345b259f21
+Subproject commit 441d2c4e19349f0b219948837922e6373347076e
From a2f5ef4ae769e0a7c59f5c7d94945a2e9b160eaf Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Wed, 27 Nov 2024 15:42:37 -0500
Subject: [PATCH 024/162] update wiki
Signed-off-by: Vladimir Mandic
---
wiki | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/wiki b/wiki
index 441d2c4e1..f57cdb49d 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit 441d2c4e19349f0b219948837922e6373347076e
+Subproject commit f57cdb49d8ca928024b43525897d1c1379eab4c4
From 164ce252dc873ca32d01222714f019c8f71c2e8d Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Thu, 28 Nov 2024 08:46:10 -0500
Subject: [PATCH 025/162] add sd35 controlnets
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 4 ++-
installer.py | 2 +-
modules/control/units/controlnet.py | 3 +++
modules/model_quant.py | 41 +++++++++++++++++++++++++++++
modules/model_sd3.py | 1 +
modules/sd_models.py | 2 +-
modules/sd_models_compile.py | 1 -
modules/shared.py | 3 ++-
8 files changed, 52 insertions(+), 5 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0167d5509..b3f2282c0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
# Change Log for SD.Next
-## Update for 2024-11-27
+## Update for 2024-11-28
### New models and integrations
@@ -21,6 +21,8 @@
*recommended*: guidance scale 30
- [Depth](https://huggingface.co/black-forest-labs/FLUX.1-Depth-dev): ~23.8GB, replaces currently loaded model
*recommended*: guidance scale 10
+- [StabilityAI SD35 ControlNets]([sd3_medium](https://huggingface.co/stabilityai/stable-diffusion-3.5-controlnets))
+ - In addition to previously released `InstantX` and `Alimama`, we now have *official* ones from StabilityAI
- [Style Aligned Image Generation](https://style-aligned-gen.github.io/)
enable in scripts, compatible with sd-xl
enter multiple prompts in prompt field separated by new line
diff --git a/installer.py b/installer.py
index 396b53fab..37202552d 100644
--- a/installer.py
+++ b/installer.py
@@ -459,7 +459,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
def check_diffusers():
if args.skip_all or args.skip_requirements:
return
- sha = '7ac6e286ee994270e737b70c904ea50049d53567'
+ sha = '069186fac510d6f6f88a5e435523b235c823a8a0'
pkg = pkg_resources.working_set.by_key.get('diffusers', None)
minor = int(pkg.version.split('.')[1] if pkg is not None else 0)
cur = opts.get('diffusers_version', '') if minor > 0 else ''
diff --git a/modules/control/units/controlnet.py b/modules/control/units/controlnet.py
index 20b99412a..3f68a4896 100644
--- a/modules/control/units/controlnet.py
+++ b/modules/control/units/controlnet.py
@@ -85,6 +85,9 @@
"XLabs-AI HED": 'XLabs-AI/flux-controlnet-hed-diffusers'
}
predefined_sd3 = {
+ "StabilityAI Canny": 'diffusers-internal-dev/sd35-controlnet-canny-8b',
+ "StabilityAI Depth": 'diffusers-internal-dev/sd35-controlnet-depth-8b',
+ "StabilityAI Blur": 'diffusers-internal-dev/sd35-controlnet-blur-8b',
"InstantX Canny": 'InstantX/SD3-Controlnet-Canny',
"InstantX Pose": 'InstantX/SD3-Controlnet-Pose',
"InstantX Depth": 'InstantX/SD3-Controlnet-Depth',
diff --git a/modules/model_quant.py b/modules/model_quant.py
index 0e7bdd4b3..9482fe898 100644
--- a/modules/model_quant.py
+++ b/modules/model_quant.py
@@ -5,6 +5,7 @@
bnb = None
quanto = None
+ao = None
def create_bnb_config(kwargs = None, allow_bnb: bool = True):
@@ -12,6 +13,8 @@ def create_bnb_config(kwargs = None, allow_bnb: bool = True):
if len(shared.opts.bnb_quantization) > 0 and allow_bnb:
if 'Model' in shared.opts.bnb_quantization:
load_bnb()
+ if bnb is None:
+ return kwargs
bnb_config = diffusers.BitsAndBytesConfig(
load_in_8bit=shared.opts.bnb_quantization_type in ['fp8'],
load_in_4bit=shared.opts.bnb_quantization_type in ['nf4', 'fp4'],
@@ -28,6 +31,44 @@ def create_bnb_config(kwargs = None, allow_bnb: bool = True):
return kwargs
+def create_ao_config(kwargs = None, allow_ao: bool = True):
+ from modules import shared
+ if len(shared.opts.torchao_quantization) > 0 and shared.opts.torchao_quantization_mode == 'pre' and allow_ao:
+ if 'Model' in shared.opts.torchao_quantization:
+ load_torchao()
+ if ao is None:
+ return kwargs
+ ao_config = {}
+ # ao_config = diffusers.TorchAoConfig("int8wo") # TODO torchao
+ shared.log.debug(f'Quantization: module=all type=bnb dtype={shared.opts.torchao_quantization_type}')
+ if kwargs is None:
+ return ao_config
+ else:
+ kwargs['quantization_config'] = ao_config
+ return kwargs
+ return kwargs
+
+
+def load_torchao(msg='', silent=False):
+ global ao # pylint: disable=global-statement
+ if ao is not None:
+ return ao
+ install('torchao', quiet=True)
+ try:
+ import torchao
+ ao = torchao
+ fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
+ log.debug(f'Quantization: type=quanto version={ao.__version__} fn={fn}') # pylint: disable=protected-access
+ return ao
+ except Exception as e:
+ if len(msg) > 0:
+ log.error(f"{msg} failed to import optimum.quanto: {e}")
+ ao = None
+ if not silent:
+ raise
+ return None
+
+
def load_bnb(msg='', silent=False):
global bnb # pylint: disable=global-statement
if bnb is not None:
diff --git a/modules/model_sd3.py b/modules/model_sd3.py
index b9d579085..ba036760a 100644
--- a/modules/model_sd3.py
+++ b/modules/model_sd3.py
@@ -150,6 +150,7 @@ def load_sd3(checkpoint_info, cache_dir=None, config=None):
shared.log.debug(f'Load model: type=SD3 kwargs={list(kwargs)} repo="{repo_id}"')
kwargs = model_quant.create_bnb_config(kwargs)
+ kwargs = model_quant.create_ao_config(kwargs)
pipe = loader(
repo_id,
torch_dtype=devices.dtype,
diff --git a/modules/sd_models.py b/modules/sd_models.py
index aab35af18..68446bdd3 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -279,7 +279,7 @@ def eval_model(model, op=None, sd_model=None): # pylint: disable=unused-argument
model.eval()
return model
sd_model = sd_models_compile.apply_compile_to_model(sd_model, eval_model, ["Model", "VAE", "Text Encoder"], op="eval")
- if len(shared.opts.torchao_quantization) > 0:
+ if len(shared.opts.torchao_quantization) > 0 and shared.opts.torchao_quantization_mode != 'post':
sd_model = sd_models_compile.torchao_quantization(sd_model)
if shared.opts.opt_channelslast and hasattr(sd_model, 'unet'):
diff --git a/modules/sd_models_compile.py b/modules/sd_models_compile.py
index 91ed84ded..38d3ef57f 100644
--- a/modules/sd_models_compile.py
+++ b/modules/sd_models_compile.py
@@ -535,7 +535,6 @@ def torchao_quantization(sd_model):
if hasattr(sd_model, 'transformer') and 'Model' in shared.opts.torchao_quantization:
modules.append('transformer')
q.quantize_(sd_model.transformer, fn(), device=devices.device)
- # sd_model.transformer = q.autoquant(sd_model.transformer, error_on_unseen=False)
if hasattr(sd_model, 'vae') and 'VAE' in shared.opts.torchao_quantization:
modules.append('vae')
q.quantize_(sd_model.vae, fn(), device=devices.device)
diff --git a/modules/shared.py b/modules/shared.py
index 0c4d36746..5b54a0de2 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -477,7 +477,7 @@ def get_default_modes():
"sd_checkpoint_autoload": OptionInfo(True, "Model autoload on start"),
"sd_checkpoint_autodownload": OptionInfo(True, "Model auto-download on demand"),
"sd_textencoder_cache": OptionInfo(True, "Cache text encoder results", gr.Checkbox, {"visible": False}),
- "sd_textencoder_cache_size": OptionInfo(4, "Text encoder results LRU cache size", gr.Slider, {"minimum": 0, "maximum": 10, "step": 1}),
+ "sd_textencoder_cache_size": OptionInfo(4, "Text encoder cache size", gr.Slider, {"minimum": 0, "maximum": 16, "step": 1}),
"stream_load": OptionInfo(False, "Load models using stream loading method", gr.Checkbox, {"visible": not native }),
"prompt_mean_norm": OptionInfo(False, "Prompt attention normalization", gr.Checkbox),
"comma_padding_backtrack": OptionInfo(20, "Prompt padding", gr.Slider, {"minimum": 0, "maximum": 74, "step": 1, "visible": not native }),
@@ -590,6 +590,7 @@ def get_default_modes():
"optimum_quanto_weights_type": OptionInfo("qint8", "Optimum.quanto quantization type", gr.Radio, {"choices": ['qint8', 'qfloat8_e4m3fn', 'qfloat8_e5m2', 'qint4', 'qint2'], "visible": native}),
"optimum_quanto_activations_type": OptionInfo("none", "Optimum.quanto quantization activations ", gr.Radio, {"choices": ['none', 'qint8', 'qfloat8_e4m3fn', 'qfloat8_e5m2'], "visible": native}),
"torchao_quantization": OptionInfo([], "TorchAO quantization enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder"], "visible": native}),
+ "torchao_quantization_mode": OptionInfo("pre", "TorchAO quantization mode", gr.Radio, {"choices": ['pre', 'post'], "visible": native}),
"torchao_quantization_type": OptionInfo("int8", "TorchAO quantization type", gr.Radio, {"choices": ["int8+act", "int8", "int4", "fp8+act", "fp8", "fpx"], "visible": native}),
"nncf_compress_weights": OptionInfo([], "NNCF compression enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "ControlNet"], "visible": native}),
"nncf_compress_weights_mode": OptionInfo("INT8", "NNCF compress mode", gr.Radio, {"choices": ['INT8', 'INT8_SYM', 'INT4_ASYM', 'INT4_SYM', 'NF4'] if cmd_opts.use_openvino else ['INT8']}),
From 2c417b3280bea67f45182d728e295eb7e9920ba5 Mon Sep 17 00:00:00 2001
From: P-Hellmann
Date: Thu, 28 Nov 2024 15:14:24 +0100
Subject: [PATCH 026/162] Create black-teal-reimagined.css
Modern look of black-teal theme in SD.NEXT
---
javascript/black-teal-reimagined.css | 1105 ++++++++++++++++++++++++++
1 file changed, 1105 insertions(+)
create mode 100644 javascript/black-teal-reimagined.css
diff --git a/javascript/black-teal-reimagined.css b/javascript/black-teal-reimagined.css
new file mode 100644
index 000000000..e5618c02c
--- /dev/null
+++ b/javascript/black-teal-reimagined.css
@@ -0,0 +1,1105 @@
+/* Generic HTML Tags */
+@font-face {
+ font-family: 'NotoSans';
+ font-display: swap;
+ font-style: normal;
+ font-weight: 100;
+ src: local('NotoSans'), url('notosans-nerdfont-regular.ttf');
+}
+
+html {
+ scroll-behavior: smooth;
+}
+
+:root,
+.light,
+.dark {
+ --font: 'NotoSans';
+ --font-mono: 'ui-monospace', 'Consolas', monospace;
+ --font-size: 16px;
+
+ /* Primary Colors */
+ --primary-50: #7dffff;
+ --primary-100: #72e8e8;
+ --primary-200: #67d2d2;
+ --primary-300: #5dbcbc;
+ --primary-400: #52a7a7;
+ --primary-500: #489292;
+ --primary-600: #3e7d7d;
+ --primary-700: #356969;
+ --primary-800: #2b5656;
+ --primary-900: #224444;
+ --primary-950: #193232;
+
+ /* Neutral Colors */
+ --neutral-50: #f0f0f0;
+ --neutral-100: #e0e0e0;
+ --neutral-200: #d0d0d0;
+ --neutral-300: #b0b0b0;
+ --neutral-400: #909090;
+ --neutral-500: #707070;
+ --neutral-600: #606060;
+ --neutral-700: #404040;
+ --neutral-800: #303030;
+ --neutral-900: #202020;
+ --neutral-950: #101010;
+
+ /* Highlight and Inactive Colors */
+ --highlight-color: var(--primary-200);
+ --inactive-color: var(--primary-800);
+
+ /* Text Colors */
+ --body-text-color: var(--neutral-100);
+ --body-text-color-subdued: var(--neutral-300);
+
+ /* Background Colors */
+ --background-color: var(--neutral-950);
+ --background-fill-primary: var(--neutral-700);
+ --input-background-fill: var(--neutral-800);
+
+ /* Padding and Borders */
+ --input-padding: 4px;
+ --input-shadow: none;
+ --button-primary-text-color: var(--neutral-100);
+ --button-primary-background-fill: var(--primary-600);
+ --button-primary-background-fill-hover: var(--primary-800);
+ --button-secondary-text-color: var(--neutral-100);
+ --button-secondary-background-fill: var(--neutral-900);
+ --button-secondary-background-fill-hover: var(--neutral-600);
+
+ /* Border Radius */
+ --radius-xs: 2px;
+ --radius-sm: 4px;
+ --radius-md: 6px;
+ --radius-lg: 8px;
+ --radius-xl: 10px;
+ --radius-xxl: 15px;
+ --radius-xxxl: 20px;
+
+ /* Shadows */
+ --shadow-sm: 0 1px 2px rgba(0, 0, 0, 0.1);
+ --shadow-md: 0 2px 4px rgba(0, 0, 0, 0.1);
+ --shadow-lg: 0 4px 8px rgba(0, 0, 0, 0.1);
+ --shadow-xl: 0 8px 16px rgba(0, 0, 0, 0.1);
+
+ /* Animation */
+ --transition: all 0.3s ease;
+
+ /* Scrollbar */
+ --scrollbar-bg: var(--neutral-800);
+ --scrollbar-thumb: var(--highlight-color);
+}
+
+html {
+ font-size: var(--font-size);
+ font-family: var(--font);
+}
+
+body,
+button,
+input,
+select,
+textarea {
+ font-family: var(--font);
+ color: var(--body-text-color);
+ transition: var(--transition);
+}
+
+button {
+ max-width: 400px;
+ white-space: nowrap;
+ padding: 8px 12px;
+ border: none;
+ border-radius: var(--radius-md);
+ background-color: var(--button-primary-background-fill);
+ color: var(--button-primary-text-color);
+ cursor: pointer;
+ box-shadow: var(--shadow-sm);
+ transition: transform 0.2s ease, background-color 0.3s ease;
+}
+
+button:hover {
+ background-color: var(--button-primary-background-fill-hover);
+ transform: scale(1.05);
+}
+
+/* Range Input Styles */
+.slider-container {
+ width: 100%;
+ /* Ensures the container takes full width */
+ max-width: 100%;
+ /* Prevents overflow */
+ padding: 0 10px;
+ /* Adds padding for aesthetic spacing */
+ box-sizing: border-box;
+ /* Ensures padding doesn't affect width */
+}
+
+input[type='range'] {
+ display: block;
+ margin: 0;
+ padding: 0;
+ height: 1em;
+ background-color: transparent;
+ overflow: hidden;
+ cursor: pointer;
+ box-shadow: none;
+ -webkit-appearance: none;
+ opacity: 0.7;
+ appearance: none;
+ width: 100%;
+ /* Makes the slider responsive */
+}
+
+input[type='range'] {
+ opacity: 1;
+}
+
+input[type='range']::-webkit-slider-thumb {
+ -webkit-appearance: none;
+ height: 1em;
+ width: 1em;
+ background-color: var(--highlight-color);
+ border-radius: var(--radius-xs);
+ box-shadow: var(--shadow-md);
+ cursor: pointer;
+ /* Ensures the thumb is clickable */
+}
+
+input[type='range']::-webkit-slider-runnable-track {
+ -webkit-appearance: none;
+ height: 6px;
+ background: var(--input-background-fill);
+ border-radius: var(--radius-md);
+}
+
+input[type='range']::-moz-range-thumb {
+ height: 1em;
+ width: 1em;
+ background-color: var(--highlight-color);
+ border-radius: var(--radius-xs);
+ box-shadow: var(--shadow-md);
+ cursor: pointer;
+ /* Ensures the thumb is clickable */
+}
+
+input[type='range']::-moz-range-track {
+ height: 6px;
+ background: var(--input-background-fill);
+ border-radius: var(--radius-md);
+}
+
+@media (max-width: 768px) {
+ .slider-container {
+ width: 100%;
+ /* Adjust width for smaller screens */
+ }
+
+ .networks-menu,
+ .styles-menu {
+ width: 100%;
+ /* Ensure menus are full width */
+ margin: 0;
+ /* Reset margins for smaller screens */
+ }
+}
+
+/* Scrollbar Styles */
+:root {
+ scrollbar-color: var(--scrollbar-thumb) var(--scrollbar-bg);
+}
+
+::-webkit-scrollbar {
+ width: 12px;
+ height: 12px;
+}
+
+::-webkit-scrollbar-track {
+ background: var(--scrollbar-bg);
+}
+
+::-webkit-scrollbar-thumb {
+ background-color: var(--scrollbar-thumb);
+ border-radius: var(--radius-lg);
+ box-shadow: var(--shadow-sm);
+}
+
+/* Tab Navigation Styles */
+.tab-nav {
+ display: flex;
+ /* Use flexbox for layout */
+ justify-content: space-around;
+ /* Space out the tabs evenly */
+ align-items: center;
+ /* Center items vertically */
+ background: var(--background-color);
+ /* Background color */
+ border-bottom: 1px solid var(--highlight-color) !important;
+ /* Bottom border for separation */
+ box-shadow: var(--shadow-md);
+ /* Shadow for depth */
+}
+
+/* Individual Tab Styles */
+.tab-nav>button {
+ background: var(--neutral-900);
+ /* No background for default state */
+ color: var(--text-color);
+ /* Text color */
+ border: none;
+ /* No border */
+ border-radius: var(--radius-xxxl);
+ /* Rounded corners */
+ cursor: pointer;
+ /* Pointer cursor */
+ transition: background 0.3s ease, color 0.3s ease;
+ /* Smooth transition */
+}
+
+/* Active Tab Style */
+.tab-nav>button.active {
+ background: var(--highlight-color);
+ /* Highlight active tab */
+ color: var(--background-color);
+ /* Change text color for active tab */
+}
+
+/* Hover State for Tabs */
+.tab-nav>button:hover {
+ background: var(--highlight-color);
+ /* Background on hover */
+ color: var(--background-color);
+ /* Change text color on hover */
+}
+
+/* Responsive Styles */
+@media (max-width: 768px) {
+ .tab-nav {
+ flex-direction: column;
+ /* Stack tabs vertically on smaller screens */
+ align-items: stretch;
+ /* Stretch tabs to full width */
+ }
+
+ .tab-nav>button {
+ width: 100%;
+ /* Full width for buttons */
+ text-align: left;
+ /* Align text to the left */
+ }
+}
+
+/* Quick Settings Panel Styles */
+#quicksettings {
+ background: var(--background-color);
+ /* Background color */
+ box-shadow: var(--shadow-lg);
+ /* Shadow for depth */
+ border-radius: var(--radius-lg);
+ /* Rounded corners */
+ padding: 1em;
+ /* Padding for spacing */
+ z-index: 200;
+ /* Ensure it stays on top */
+}
+
+/* Quick Settings Header */
+#quicksettings .header {
+ font-size: var(--text-lg);
+ /* Font size for header */
+ font-weight: bold;
+ /* Bold text */
+ margin-bottom: 0.5em;
+ /* Space below header */
+}
+
+/* Quick Settings Options */
+#quicksettings .option {
+ display: flex;
+ /* Flexbox for layout */
+ justify-content: space-between;
+ /* Space between label and toggle */
+ align-items: center;
+ /* Center items vertically */
+ padding: 0.5em 0;
+ /* Padding for each option */
+ border-bottom: 1px solid var(--neutral-600);
+ /* Separator line */
+}
+
+/* Option Label Styles */
+#quicksettings .option label {
+ color: var(--text-color);
+ /* Text color */
+}
+
+/* Toggle Switch Styles */
+#quicksettings .option input[type="checkbox"] {
+ cursor: pointer;
+ /* Pointer cursor */
+}
+
+/* Quick Settings Footer */
+#quicksettings .footer {
+ margin-top: 1em;
+ /* Space above footer */
+ text-align: right;
+ /* Align text to the right */
+}
+
+/* Close Button Styles */
+#quicksettings .footer button {
+ background: var(--button-primary-background-fill);
+ /* Button background */
+ color: var(--button-primary-text-color);
+ /* Button text color */
+ border: none;
+ /* No border */
+ border-radius: var(--radius-md);
+ /* Rounded corners */
+ padding: 0.5em 1em;
+ /* Padding for button */
+ cursor: pointer;
+ /* Pointer cursor */
+ transition: 0.3s ease;
+ /* Smooth transition */
+}
+
+/* Close Button Hover State */
+#quicksettings .footer button:hover {
+ background: var(--highlight-color);
+ /* Change background on hover */
+}
+
+/* Responsive Styles */
+@media (max-width: 768px) {
+ #quicksettings {
+ right: 10px;
+ /* Adjust position for smaller screens */
+ width: 90%;
+ /* Full width on smaller screens */
+ }
+}
+
+/* Form Styles */
+div.form {
+ border-width: 0;
+ box-shadow: var(--shadow-md);
+ background: var(--background-fill-primary);
+ padding: 16px;
+ border-radius: var(--radius-md);
+}
+
+/* Gradio Style Classes */
+fieldset .gr-block.gr-box,
+label.block span {
+ padding: 0;
+ margin-top: -4px;
+}
+
+.border-2 {
+ border-width: 0;
+}
+
+.border-b-2 {
+ border-bottom-width: 2px;
+ border-color: var(--highlight-color) !important;
+ padding-bottom: 2px;
+ margin-bottom: 8px;
+}
+
+.bg-white {
+ color: lightyellow;
+ background-color: var(--inactive-color);
+}
+
+.gr-box {
+ border-radius: var(--radius-sm) !important;
+ background-color: var(--neutral-950) !important;
+ box-shadow: var(--shadow-md);
+ border-width: 0;
+ padding: 4px;
+ margin: 12px 0;
+}
+
+.gr-button {
+ font-weight: normal;
+ box-shadow: var(--shadow-sm);
+ font-size: 0.8rem;
+ min-width: 32px;
+ min-height: 32px;
+ padding: 3px;
+ margin: 3px;
+ transition: var(--transition);
+}
+
+.gr-button:hover {
+ background-color: var(--highlight-color);
+}
+
+.gr-check-radio {
+ background-color: var(--inactive-color);
+ border-width: 0;
+ border-radius: var(--radius-lg);
+ box-shadow: var(--shadow-sm);
+}
+
+.gr-check-radio:checked {
+ background-color: var(--highlight-color);
+}
+
+.gr-compact {
+ background-color: var(--background-color);
+}
+
+.gr-form {
+ border-width: 0;
+}
+
+.gr-input {
+ background-color: var(--neutral-800) !important;
+ padding: 4px;
+ margin: 4px;
+ border-radius: var(--radius-md);
+ transition: var(--transition);
+}
+
+.gr-input:hover {
+ background-color: var(--neutral-700);
+}
+
+.gr-input-label {
+ color: lightyellow;
+ border-width: 0;
+ background: transparent;
+ padding: 2px !important;
+}
+
+.gr-panel {
+ background-color: var(--background-color);
+ border-radius: var(--radius-md);
+ box-shadow: var(--shadow-md);
+}
+
+.eta-bar {
+ display: none !important;
+}
+
+.gradio-slider {
+ max-width: 200px;
+}
+
+.gradio-slider input[type="number"] {
+ background: var(--neutral-950);
+ margin-top: 2px;
+}
+
+.gradio-image {
+ height: unset !important;
+}
+
+svg.feather.feather-image,
+.feather .feather-image {
+ display: none;
+}
+
+.gap-2 {
+ padding-top: 8px;
+}
+
+.gr-box>div>div>input.gr-text-input {
+ right: 0;
+ width: 4em;
+ padding: 0;
+ top: -12px;
+ border: none;
+ max-height: 20px;
+}
+
+.output-html {
+ line-height: 1.2 rem;
+ overflow-x: hidden;
+}
+
+.output-html>div {
+ margin-bottom: 8px;
+}
+
+.overflow-hidden .flex .flex-col .relative col .gap-4 {
+ min-width: var(--left-column);
+ max-width: var(--left-column);
+}
+
+.p-2 {
+ padding: 0;
+}
+
+.px-4 {
+ padding-left: 1rem;
+ padding-right: 1rem;
+}
+
+.py-6 {
+ padding-bottom: 0;
+}
+
+.tabs {
+ background-color: var(--background-color);
+}
+
+.block.token-counter span {
+ background-color: var(--input-background-fill) !important;
+ box-shadow: 2px 2px 2px #111;
+ border: none !important;
+ font-size: 0.7rem;
+}
+
+.label-wrap {
+ margin: 8px 0px 4px 0px;
+}
+
+.gradio-button.tool {
+ border: none;
+ background: none;
+ box-shadow: none;
+ filter: hue-rotate(340deg) saturate(0.5);
+}
+
+#tab_extensions table td,
+#tab_extensions table th,
+#tab_config table td,
+#tab_config table th {
+ border: none;
+}
+
+#tab_extensions table tr:hover,
+#tab_config table tr:hover {
+ background-color: var(--neutral-500) !important;
+}
+
+#tab_extensions table,
+#tab_config table {
+ width: 96vw;
+}
+
+#tab_extensions table thead,
+#tab_config table thead {
+ background-color: var(--neutral-700);
+}
+
+#tab_extensions table,
+#tab_config table {
+ background-color: var(--neutral-900);
+}
+
+/* Automatic Style Classes */
+.progressDiv {
+ border-radius: var(--radius-sm) !important;
+ position: fixed;
+ top: 44px;
+ right: 26px;
+ max-width: 262px;
+ height: 48px;
+ z-index: 99;
+ box-shadow: var(--button-shadow);
+}
+
+.progressDiv .progress {
+ border-radius: var(--radius-lg) !important;
+ background: var(--highlight-color);
+ line-height: 3rem;
+ height: 48px;
+}
+
+.gallery-item {
+ box-shadow: none !important;
+}
+
+.performance {
+ color: #888;
+}
+
+.extra-networks {
+ border-left: 2px solid var(--highlight-color) !important;
+ padding-left: 4px;
+}
+
+.image-buttons {
+ justify-content: center;
+ gap: 0 !important;
+}
+
+.image-buttons>button {
+ max-width: 160px;
+}
+
+.tooltip {
+ background: var(--primary-300);
+ color: black;
+ border: none;
+ border-radius: var(--radius-lg);
+}
+
+#system_row>button,
+#settings_row>button,
+#config_row>button {
+ max-width: 10em;
+}
+
+/* Gradio Elements Overrides */
+#div.gradio-container {
+ overflow-x: hidden;
+}
+
+#img2img_label_copy_to_img2img {
+ font-weight: normal;
+}
+
+#txt2img_prompt,
+#txt2img_neg_prompt,
+#img2img_prompt,
+#img2img _neg_prompt,
+#control_prompt,
+#control_neg_prompt {
+ background-color: var(--background-color);
+ box-shadow: none !important;
+}
+
+#txt2img_prompt>label>textarea,
+#txt2img_neg_prompt>label>textarea,
+#img2img_prompt>label>textarea,
+#img2img_neg_prompt>label>textarea,
+#control_prompt>label>textarea,
+#control_neg_prompt>label>textarea {
+ font-size: 1.0em;
+ line-height: 1.4em;
+ border-radius: var(--radius-md);
+}
+
+#txt2img_styles,
+#img2img_styles,
+#control_styles {
+ padding: 0;
+ margin-top: 2px;
+}
+
+#txt2img_styles_refresh,
+#img2img_styles_refresh,
+#control_styles_refresh {
+ padding: 0;
+ margin-top: 1em;
+}
+
+#img2img_settings {
+ min-width: calc(2 * var(--left-column));
+ max-width: calc(2 * var(--left-column));
+ background-color: var(--neutral-950);
+ padding-top: 16px;
+}
+
+#interrogate,
+#deepbooru {
+ margin: 0 0px 10px 0px;
+ max-width: 80px;
+ max-height: 80px;
+ font-weight: normal;
+ font-size: 0.95em;
+}
+
+#quicksettings .gr-button-tool {
+ font-size: 1.6rem;
+ box-shadow: none;
+ margin-left: -20px;
+ margin-top: -2px;
+ height: 2.4em;
+}
+
+#footer,
+#style_pos_col,
+#style_neg_col,
+#roll_col,
+#extras_upscaler_2,
+#extras_upscaler_2_visibility,
+#txt2img_seed_resize_from_w,
+#txt2img_seed_resize_from_h {
+ display: none;
+}
+
+#save-animation {
+ border-radius: var(--radius-sm) !important;
+ margin-bottom: 16px;
+ background-color: var(--neutral-950);
+}
+
+#script_list {
+ padding: 4px;
+ margin-top: 16px;
+ margin-bottom: 8px;
+}
+
+#settings>div.flex-wrap {
+ width: 15em;
+}
+
+#settings_search {
+ margin-top: 1em;
+ margin-left: 1em;
+}
+
+#settings_search textarea {
+ padding: 0.5em;
+ height: 2.2em !important;
+}
+
+#txt2img_cfg_scale {
+ min-width: 200px;
+}
+
+#txt2img_checkboxes,
+#img2img_checkboxes,
+#control_checkboxes {
+ background-color: transparent;
+ margin-bottom: 0.2em;
+}
+
+textarea[rows="1"] {
+ height: 33px !important;
+ width: 99% !important;
+ padding: 8px !important;
+}
+
+#extras_upscale {
+ margin-top: 10px;
+}
+
+#txt2img_progress_row>div {
+ min-width: var(--left-column);
+ max-width: var(--left-column);
+}
+
+#txt2img_settings {
+ min-width: var(--left-column);
+ max-width: var(--left-column);
+ background-color: var(--neutral-950);
+ padding-top: 16px;
+}
+
+#pnginfo_html2_info {
+ margin-top: -18px;
+ background-color: var(--input-background-fill);
+ padding: var(--input-padding);
+}
+
+#txt2img_styles_row,
+#img2img_styles_row,
+#control_styles_row {
+ margin-top: -6px;
+}
+
+.block>span {
+ margin-bottom: 0 !important;
+ margin-top: var(--spacing-lg);
+}
+
+/* Extra Networks Container */
+#extra_networks_root {
+ width: 300px;
+ /* Set a fixed width for the sidebar */
+ position: absolute;
+ height: auto;
+ right: 0;
+ top: 13em;
+ z-index: 100;
+ background: var(--background-color);
+ box-shadow: var(--shadow-md);
+ border-radius: var(--radius-lg);
+ overflow: hidden;
+ /* Prevents overflow of content */
+}
+
+/* Extra Networks Styles */
+.extra-networks {
+ background: var(--background-color);
+ padding: var(--block-label-padding);
+ border-radius: var(--radius-lg);
+}
+
+/* Extra Networks Div Styles */
+.extra-networks>div {
+ margin: 0;
+ border-bottom: none !important;
+ gap: 0.3em 0;
+}
+
+.extra-networks .tab-nav>button:hover {
+ background: var(--highlight-color);
+}
+
+/* Network tab search and description important fix, dont remove */
+#txt2img_description,
+#txt2img_extra_search,
+#img2img_description,
+#img2img_extra_search,
+#control_description,
+#control_extra_search {
+ margin-top: 50px;
+}
+
+/* Individual Buttons */
+.extra-networks .buttons>button {
+ margin-left: -0.2em;
+ height: 1.4em;
+ color: var(--primary-300) !important;
+ font-size: 20px !important;
+ background: var(--button-primary-background-fill);
+ border: none;
+ border-radius: var(--radius-sm);
+ transition: var(--transition);
+}
+
+.extra-networks .buttons>button:hover {
+ background: var(--highlight-color);
+}
+
+/* Extra Networks Tab */
+.extra-networks-tab {
+ padding: 0 !important;
+}
+
+/* Subdirectories Styles */
+.extra-network-subdirs {
+ background: var(--input-background-fill);
+ overflow-x: hidden;
+ overflow-y: auto;
+ min-width: 120px;
+ padding-top: 0.5em;
+ margin-top: -4px !important;
+}
+
+/* Extra Networks Page */
+.extra-networks-page {
+ display: flex;
+}
+
+/* Network Cards Container */
+.extra-network-cards {
+ display: flex;
+ flex-wrap: wrap;
+ overflow-y: auto;
+ overflow-x: hidden;
+ align-content: flex-start;
+ width: 100%;
+ /* Ensures it takes full width */
+}
+
+/* Individual Card Styles */
+.extra-network-cards .card {
+ height: fit-content;
+ margin: 0 0 0.5em 0.5em;
+ position: relative;
+ scroll-snap-align: start;
+ scroll-margin-top: 0;
+ background: var(--neutral-800);
+ /* Background for cards */
+ border-radius: var(--radius-md);
+ box-shadow: var(--shadow-md);
+ transition: var(--transition);
+}
+
+/* Overlay Styles */
+.extra-network-cards .card .overlay {
+ z-index: 10;
+ width: 100%;
+ background: none;
+ border-radius: var(--radius-md);
+}
+
+/* Overlay Name Styles */
+.extra-network-cards .card .overlay .name {
+ font-size: var(--text-lg);
+ font-weight: bold;
+ text-shadow: 1px 1px black;
+ color: white;
+ overflow-wrap: anywhere;
+ position: absolute;
+ bottom: 0;
+ padding: 0.2em;
+ z-index: 10;
+}
+
+/* Preview Styles */
+.extra-network-cards .card .preview {
+ box-shadow: var(--button-shadow);
+ min-height: 30px;
+ border-radius: var(--radius-md);
+}
+
+/* Hover Effects */
+.extra-network-cards .card:hover .overlay {
+ background: rgba(0, 0, 0, 0.70);
+}
+
+.extra-network-cards .card:hover .preview {
+ box-shadow: none;
+ filter: grayscale(100%);
+}
+
+/* Tags Styles */
+.extra-network-cards .card .overlay .tags {
+ display: none;
+ overflow-wrap: anywhere;
+ position: absolute;
+ top: 100%;
+ z-index: 20;
+ background: var(--body-background-fill);
+ overflow-x: hidden;
+ overflow-y: auto;
+ max-height: 333px;
+}
+
+/* Individual Tag Styles */
+.extra-network-cards .card .overlay .tag {
+ padding: 2px;
+ margin: 2px;
+ background: rgba(70, 70, 70, 0.60);
+ font-size: var(--text-md);
+ cursor: pointer;
+ display: inline-block;
+}
+
+/* Actions Styles */
+.extra-network-cards .card .actions>span {
+ padding: 4px;
+ font-size: 34px !important;
+}
+
+.extra-network-cards .card .actions>span:hover {
+ color: var(--highlight-color);
+}
+
+/* Version Styles */
+.extra-network-cards .card .version {
+ position: absolute;
+ top: 0;
+ left: 0;
+ padding: 2px;
+ font-weight: bolder;
+ text-shadow: 1px 1px black;
+ text-transform: uppercase;
+ background: gray;
+ opacity: 75%;
+ margin: 4px;
+ line-height: 0.9rem;
+}
+
+/* Hover Actions */
+.extra-network-cards .card:hover .actions {
+ display: block;
+}
+
+.extra-network-cards .card:hover .overlay .tags {
+ display: block;
+}
+
+/* No Preview Card Styles */
+.extra-network-cards .card:has(>img[src*="card-no-preview.png"])::before {
+ content: '';
+ position: absolute;
+ width: 100%;
+ height: 100%;
+ mix-blend-mode: multiply;
+ background-color: var(--data-color);
+}
+
+/* Card List Styles */
+.extra-network-cards .card-list {
+ display: flex;
+ margin: 0.3em;
+ padding: 0.3em;
+ background: var(--input-background-fill);
+ cursor: pointer;
+ border-radius: var(--button-large-radius);
+}
+
+.extra-network-cards .card-list .tag {
+ color: var(--primary-500);
+ margin-left: 0.8em;
+}
+
+/* Correction color picker styling */
+#txt2img_hdr_color_picker label input {
+ width: 100%;
+ height: 100%;
+}
+
+/* Based on Gradio Built-in Dark Theme */
+:root,
+.light,
+.dark {
+ --body-background-fill: var(--background-color);
+ --color-accent-soft: var(--neutral-700);
+ --background-fill-secondary: none;
+ --border-color-accent: var(--background-color);
+ --border-color-primary: var(--background-color);
+ --link-text-color-active: var(--primary-500);
+ --link-text-color: var(--secondary-500);
+ --link-text-color-hover: var(--secondary-400);
+ --link-text-color-visited: var(--secondary-600);
+ --shadow-spread: 1px;
+ --block-background-fill: none;
+ --block-border-color: var(--border-color-primary);
+ --block_border_width: none;
+ --block-info-text-color: var(--body-text-color-subdued);
+ --block-label-background-fill: var(--background-fill-secondary);
+ --block-label-border-color: var(--border-color-primary);
+ --block_label_border_width: none;
+ --block-label-text-color: var(--neutral-200);
+ --block-shadow: none;
+ --block-title-background-fill: none;
+ --block-title-border-color: none;
+ --block-title-border-width: 0px;
+ --block-title-padding: 0;
+ --block-title-radius: none;
+ --block-title-text-size: var(--text-md);
+ --block-title-text-weight: 400;
+ --container-radius: var(--radius-lg);
+ --form-gap-width: 1px;
+ --layout-gap: var(--spacing-xxl);
+ --panel-border-width: 0;
+ --section-header-text-size: var(--text-md);
+ --section-header-text-weight: 400;
+ --checkbox-border-radius: var(--radius-sm);
+ --checkbox-label-gap: 2px;
+ --checkbox-label-padding: var(--spacing-md);
+ --checkbox-label-shadow: var(--shadow-drop);
+ --checkbox-label-text-size: var(--text-md);
+ --checkbox-label-text-weight: 400;
+ --checkbox-check: url("data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3cpath d='M12.207 4.793a1 1 0 010 1.414l-5 5a1 1 0 01-1.414 0l-2-2a1 1 0 011.414-1.414L6.5 9.086l4.293-4.293a1 1 0 011.414 0z'/%3e%3c/svg%3e");
+ --radio-circle: url("data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3ccircle cx='8' cy='8' r='3'/%3e%3c/svg%3e");
+ --checkbox-shadow: var(--input-shadow);
+ --error-border-width: 1px;
+ --input-border-width: 0;
+ --input-radius: var(--radius-lg);
+ --input-text-size: var(--text-md);
+ --input-text-weight: 400;
+ --loader-color: var(--color-accent);
+ --prose-text-size: var(--text-md);
+ --prose-text-weight: 400;
+ --prose-header-text-weight: 400;
+ --slider-color: var(--neutral-900);
+ --table-radius: var(--radius-lg);
+ --button-large-padding: 2px 6px;
+ --button-large-radius: var(--radius-lg);
+ --button-large-text-size: var(--text-lg);
+ --button-large-text-weight: 400;
+ --button-shadow: none;
+ --button-shadow-active: none;
+ --button-shadow-hover: none;
+ --button-small-padding: var(--spacing-sm) calc(2 * var(--spacing-sm));
+ --button-small-radius: var(--radius-lg);
+ --button-small-text-size: var(--text-md);
+ --button-small-text-weight: 400;
+ --button-transition: none;
+ --size-9: 64px;
+ --size-14: 64px;
+}
\ No newline at end of file
From 41a63008dcec0911c43f61e89a64424191a3a2d6 Mon Sep 17 00:00:00 2001
From: Disty0
Date: Thu, 28 Nov 2024 19:18:41 +0300
Subject: [PATCH 027/162] Fix memmon
---
modules/memmon.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/modules/memmon.py b/modules/memmon.py
index 6887e1e1c..d9fa3963d 100644
--- a/modules/memmon.py
+++ b/modules/memmon.py
@@ -42,14 +42,14 @@ def read(self):
if not self.disabled:
try:
self.data["free"], self.data["total"] = torch.cuda.mem_get_info(self.device.index if self.device.index is not None else torch.cuda.current_device())
+ self.data["used"] = self.data["total"] - self.data["free"]
torch_stats = torch.cuda.memory_stats(self.device)
- self.data["active"] = torch_stats["active.all.current"]
+ self.data["active"] = torch_stats.get("active.all.current", torch_stats["active_bytes.all.current"])
self.data["active_peak"] = torch_stats["active_bytes.all.peak"]
self.data["reserved"] = torch_stats["reserved_bytes.all.current"]
self.data["reserved_peak"] = torch_stats["reserved_bytes.all.peak"]
- self.data['retries'] = torch_stats["num_alloc_retries"]
- self.data['oom'] = torch_stats["num_ooms"]
- self.data["used"] = self.data["total"] - self.data["free"]
+ self.data['retries'] = torch_stats.get("num_alloc_retries", -1)
+ self.data['oom'] = torch_stats.get("num_ooms", -1)
except Exception:
self.disabled = True
return self.data
From 964b4c9e5a6f7bdab943007edeeb3b2c828ac158 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Thu, 28 Nov 2024 09:11:42 -0500
Subject: [PATCH 028/162] euler flowmatch add sigma methods
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 3 ++-
modules/sd_samplers_diffusers.py | 20 ++++++++++----------
2 files changed, 12 insertions(+), 11 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b3f2282c0..f88c78302 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -50,7 +50,8 @@
- control: add stats
- browser->server logging framework
- **Sampler** improvements
- - update DPM FlowMatch samplers
+ - Euler FlowMatch: add sigma methods (*karras/exponential/betas*)
+ - DPM FlowMatch: update all and add sigma methods
### Fixes
diff --git a/modules/sd_samplers_diffusers.py b/modules/sd_samplers_diffusers.py
index 9f24d5a91..4672df92e 100644
--- a/modules/sd_samplers_diffusers.py
+++ b/modules/sd_samplers_diffusers.py
@@ -69,7 +69,7 @@
'Euler a': { 'steps_offset': 0, 'rescale_betas_zero_snr': False, 'timestep_spacing': 'linspace' },
'Euler SGM': { 'steps_offset': 0, 'interpolation_type': "linear", 'rescale_betas_zero_snr': False, 'final_sigmas_type': 'zero', 'timestep_spacing': 'trailing', 'use_beta_sigmas': False, 'use_exponential_sigmas': False, 'use_karras_sigmas': False, 'prediction_type': "sample" },
'Euler EDM': { 'sigma_schedule': "karras" },
- 'Euler FlowMatch': { 'timestep_spacing': "linspace", 'shift': 1, 'use_dynamic_shifting': False },
+ 'Euler FlowMatch': { 'timestep_spacing': "linspace", 'shift': 1, 'use_dynamic_shifting': False, 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_beta_sigmas': False },
'DPM++': { 'solver_order': 2, 'thresholding': False, 'sample_max_value': 1.0, 'algorithm_type': "dpmsolver++", 'solver_type': "midpoint", 'lower_order_final': True, 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_beta_sigmas': False, 'final_sigmas_type': 'sigma_min' },
'DPM++ 1S': { 'thresholding': False, 'sample_max_value': 1.0, 'algorithm_type': "dpmsolver++", 'solver_type': "midpoint", 'lower_order_final': True, 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_beta_sigmas': False, 'use_lu_lambdas': False, 'final_sigmas_type': 'zero', 'timestep_spacing': 'linspace', 'solver_order': 1 },
@@ -200,16 +200,16 @@ def __init__(self, name, constructor, model, **kwargs):
timesteps = re.split(',| ', shared.opts.schedulers_timesteps)
timesteps = [int(x) for x in timesteps if x.isdigit()]
if len(timesteps) == 0:
- if 'use_beta_sigmas' in self.config:
- self.config['use_beta_sigmas'] = shared.opts.schedulers_sigma == 'beta'
- if 'use_karras_sigmas' in self.config:
- self.config['use_karras_sigmas'] = shared.opts.schedulers_sigma == 'karras'
- if 'use_exponential_sigmas' in self.config:
- self.config['use_exponential_sigmas'] = shared.opts.schedulers_sigma == 'exponential'
- if 'use_lu_lambdas' in self.config:
- self.config['use_lu_lambdas'] = shared.opts.schedulers_sigma == 'lambdas'
if 'sigma_schedule' in self.config:
self.config['sigma_schedule'] = shared.opts.schedulers_sigma if shared.opts.schedulers_sigma != 'default' else None
+ if shared.opts.schedulers_sigma == 'betas' and 'use_beta_sigmas' in self.config:
+ self.config['use_beta_sigmas'] = True
+ elif shared.opts.schedulers_sigma == 'karras' and 'use_karras_sigmas' in self.config:
+ self.config['use_karras_sigmas'] = True
+ elif shared.opts.schedulers_sigma == 'exponential' and 'use_exponential_sigmas' in self.config:
+ self.config['use_exponential_sigmas'] = True
+ elif shared.opts.schedulers_sigma == 'lambdas' and 'use_lu_lambdas' in self.config:
+ self.config['use_lu_lambdas'] = True
else:
pass # timesteps are set using set_timesteps in set_pipeline_args
@@ -236,7 +236,7 @@ def __init__(self, name, constructor, model, **kwargs):
if 'use_dynamic_shifting' in self.config:
if 'Flux' in model.__class__.__name__:
self.config['use_dynamic_shifting'] = shared.opts.schedulers_dynamic_shift
- if 'use_beta_sigmas' in self.config:
+ if 'use_beta_sigmas' in self.config and 'sigma_schedule' in self.config:
self.config['use_beta_sigmas'] = 'StableDiffusion3' in model.__class__.__name__
if 'rescale_betas_zero_snr' in self.config:
self.config['rescale_betas_zero_snr'] = shared.opts.schedulers_rescale_betas
From 425f51bb2524914a5a8ed834d144b82cc168cc75 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Thu, 28 Nov 2024 13:28:46 -0500
Subject: [PATCH 029/162] simplify impaint/sketch canvas handling
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 5 ++-
javascript/black-teal.css | 1 -
javascript/imageMaskFix.js | 38 ------------------
javascript/sdnext.css | 20 ++++++----
javascript/ui.js | 2 +-
modules/img2img.py | 14 +++----
modules/ui_control.py | 4 +-
modules/ui_img2img.py | 81 ++++++++++++++++++--------------------
8 files changed, 65 insertions(+), 100 deletions(-)
delete mode 100644 javascript/imageMaskFix.js
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f88c78302..919041bde 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -48,7 +48,8 @@
- control: hide preview column by default
- control: optionn to hide input column
- control: add stats
- - browser->server logging framework
+ - browser -> server logging framework
+ - add addtional themes: `black-reimagined`
- **Sampler** improvements
- Euler FlowMatch: add sigma methods (*karras/exponential/betas*)
- DPM FlowMatch: update all and add sigma methods
@@ -65,6 +66,8 @@
- allow xyz-grid with multi-axis s&r
- fix xyz-grid with lora
- fix api script callbacks
+- fix gpu memory monitoring
+- simplify img2img/inpaint/sketch canvas handling
## Update for 2024-11-21
diff --git a/javascript/black-teal.css b/javascript/black-teal.css
index b73f9fdc7..2ebf32e96 100644
--- a/javascript/black-teal.css
+++ b/javascript/black-teal.css
@@ -108,7 +108,6 @@ fieldset .gr-block.gr-box, label.block span { padding: 0; margin-top: -4px; }
.eta-bar { display: none !important }
.gradio-slider { max-width: 200px; }
.gradio-slider input[type="number"] { background: var(--neutral-950); margin-top: 2px; }
-.gradio-image { height: unset !important; }
svg.feather.feather-image, .feather .feather-image { display: none }
.gap-2 { padding-top: 8px; }
.gr-box > div > div > input.gr-text-input { right: 0; width: 4em; padding: 0; top: -12px; border: none; max-height: 20px; }
diff --git a/javascript/imageMaskFix.js b/javascript/imageMaskFix.js
deleted file mode 100644
index fd37caf90..000000000
--- a/javascript/imageMaskFix.js
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- * temporary fix for https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/668
- * @see https://github.com/gradio-app/gradio/issues/1721
- */
-function imageMaskResize() {
- const canvases = gradioApp().querySelectorAll('#img2maskimg .touch-none canvas');
- if (!canvases.length) {
- window.removeEventListener('resize', imageMaskResize);
- return;
- }
- const wrapper = canvases[0].closest('.touch-none');
- const previewImage = wrapper.previousElementSibling;
- if (!previewImage.complete) {
- previewImage.addEventListener('load', imageMaskResize);
- return;
- }
- const w = previewImage.width;
- const h = previewImage.height;
- const nw = previewImage.naturalWidth;
- const nh = previewImage.naturalHeight;
- const portrait = nh > nw;
- const wW = Math.min(w, portrait ? h / nh * nw : w / nw * nw);
- const wH = Math.min(h, portrait ? h / nh * nh : w / nw * nh);
- wrapper.style.width = `${wW}px`;
- wrapper.style.height = `${wH}px`;
- wrapper.style.left = '0px';
- wrapper.style.top = '0px';
- canvases.forEach((c) => {
- c.style.width = '';
- c.style.height = '';
- c.style.maxWidth = '100%';
- c.style.maxHeight = '100%';
- c.style.objectFit = 'contain';
- });
-}
-
-onAfterUiUpdate(imageMaskResize);
-window.addEventListener('resize', imageMaskResize);
diff --git a/javascript/sdnext.css b/javascript/sdnext.css
index 240b7492f..c5145c973 100644
--- a/javascript/sdnext.css
+++ b/javascript/sdnext.css
@@ -30,6 +30,19 @@ input::-webkit-outer-spin-button, input::-webkit-inner-spin-button { margin-left
.hidden { display: none; }
.tabitem { padding: 0 !important; }
+/* gradio image/canvas elements */
+.image-container { overflow: auto; }
+/*
+.gradio-image { min-height: fit-content; }
+.gradio-image img { object-fit: contain; }
+*/
+/*
+.gradio-image { min-height: 200px !important; }
+.image-container { height: unset !important; }
+.control-image { height: unset !important; }
+#img2img_sketch, #img2maskimg, #inpaint_sketch { overflow: overlay !important; resize: auto; background: var(--panel-background-fill); z-index: 5; }
+*/
+
/* color elements */
.gradio-dropdown, .block.gradio-slider, .block.gradio-checkbox, .block.gradio-textbox, .block.gradio-radio, .block.gradio-checkboxgroup, .block.gradio-number, .block.gradio-colorpicker { border-width: 0 !important; box-shadow: none !important;}
.gradio-accordion { padding-top: var(--spacing-md) !important; padding-right: 0 !important; padding-bottom: 0 !important; color: var(--body-text-color); }
@@ -87,8 +100,6 @@ button.custom-button { border-radius: var(--button-large-radius); padding: var(-
.performance .time { margin-right: 0; }
.thumbnails { background: var(--body-background-fill); }
.prompt textarea { resize: vertical; }
-.image-container { height: unset !important; }
-.control-image { height: unset !important; }
.grid-wrap { overflow-y: auto !important; }
#control_results { margin: 0; padding: 0; }
#txt2img_gallery, #img2img_gallery { height: 50vh; }
@@ -106,7 +117,6 @@ button.custom-button { border-radius: var(--button-large-radius); padding: var(-
#txt2img_prompt, #txt2img_neg_prompt, #img2img_prompt, #img2img_neg_prompt, #control_prompt, #control_neg_prompt { display: contents; }
#txt2img_actions_column, #img2img_actions_column, #control_actions { flex-flow: wrap; justify-content: space-between; }
-
.interrogate-clip { position: absolute; right: 6em; top: 8px; max-width: fit-content; background: none !important; z-index: 50; }
.interrogate-blip { position: absolute; right: 4em; top: 8px; max-width: fit-content; background: none !important; z-index: 50; }
.interrogate-col { min-width: 0 !important; max-width: fit-content; margin-right: var(--spacing-xxl); }
@@ -119,8 +129,6 @@ div#extras_scale_to_tab div.form { flex-direction: row; }
#img2img_unused_scale_by_slider { visibility: hidden; width: 0.5em; max-width: 0.5em; min-width: 0.5em; }
.inactive{ opacity: 0.5; }
div#extras_scale_to_tab div.form { flex-direction: row; }
-#mode_img2img .gradio-image>div.fixed-height, #mode_img2img .gradio-image>div.fixed-height img{ height: 480px !important; max-height: 480px !important; min-height: 480px !important; }
-#img2img_sketch, #img2maskimg, #inpaint_sketch { overflow: overlay !important; resize: auto; background: var(--panel-background-fill); z-index: 5; }
.image-buttons button { min-width: auto; }
.infotext { overflow-wrap: break-word; line-height: 1.5em; font-size: 0.95em !important; }
.infotext > p { white-space: pre-wrap; color: var(--block-info-text-color) !important; }
@@ -380,8 +388,6 @@ div:has(>#tab-gallery-folders) { flex-grow: 0 !important; background-color: var(
#img2img_actions_column { display: flex; min-width: fit-content !important; flex-direction: row;justify-content: space-evenly; align-items: center;}
#txt2img_generate_box, #img2img_generate_box, #txt2img_enqueue_wrapper,#img2img_enqueue_wrapper {display: flex;flex-direction: column;height: 4em !important;align-items: stretch;justify-content: space-evenly;}
#img2img_interface, #img2img_results, #img2img_footer p { text-wrap: wrap; min-width: 100% !important; max-width: 100% !important;} /* maintain single column for from image operations on larger mobile devices */
- #img2img_sketch, #img2maskimg, #inpaint_sketch {display: flex; overflow: auto !important; resize: none !important; } /* fix inpaint image display being too large for mobile displays */
- #img2maskimg canvas { width: auto !important; max-height: 100% !important; height: auto !important; }
#txt2img_sampler, #txt2img_batch, #txt2img_seed_group, #txt2img_advanced, #txt2img_second_pass, #img2img_sampling_group, #img2img_resize_group, #img2img_batch_group, #img2img_seed_group, #img2img_denoise_group, #img2img_advanced_group { width: 100% !important; } /* fix from text/image UI elements to prevent them from moving around within the UI */
#img2img_resize_group .gradio-radio>div { display: flex; flex-direction: column; width: unset !important; }
#inpaint_controls div { display:flex;flex-direction: row;}
diff --git a/javascript/ui.js b/javascript/ui.js
index 81d1c67e4..3e3f14390 100644
--- a/javascript/ui.js
+++ b/javascript/ui.js
@@ -139,7 +139,7 @@ function switch_to_inpaint(...args) {
return Array.from(arguments);
}
-function switch_to_inpaint_sketch(...args) {
+function switch_to_composite(...args) {
switchToTab('Image');
switch_to_img2img_tab(3);
return Array.from(arguments);
diff --git a/modules/img2img.py b/modules/img2img.py
index 8274386cc..077df1259 100644
--- a/modules/img2img.py
+++ b/modules/img2img.py
@@ -164,12 +164,7 @@ def img2img(id_task: str, state: str, mode: int,
return [], '', '', 'Error: init image not provided'
image = init_img.convert("RGB")
mask = None
- elif mode == 1: # img2img sketch
- if sketch is None:
- return [], '', '', 'Error: sketch image not provided'
- image = sketch.convert("RGB")
- mask = None
- elif mode == 2: # inpaint
+ elif mode == 1: # inpaint
if init_img_with_mask is None:
return [], '', '', 'Error: init image with mask not provided'
image = init_img_with_mask["image"]
@@ -177,7 +172,12 @@ def img2img(id_task: str, state: str, mode: int,
alpha_mask = ImageOps.invert(image.split()[-1]).convert('L').point(lambda x: 255 if x > 0 else 0, mode='1')
mask = ImageChops.lighter(alpha_mask, mask.convert('L')).convert('L')
image = image.convert("RGB")
- elif mode == 3: # inpaint sketch
+ elif mode == 2: # sketch
+ if sketch is None:
+ return [], '', '', 'Error: sketch image not provided'
+ image = sketch.convert("RGB")
+ mask = None
+ elif mode == 3: # composite
if inpaint_color_sketch is None:
return [], '', '', 'Error: color sketch image not provided'
image = inpaint_color_sketch
diff --git a/modules/ui_control.py b/modules/ui_control.py
index f4329663a..072d9b9c9 100644
--- a/modules/ui_control.py
+++ b/modules/ui_control.py
@@ -13,7 +13,7 @@
from modules import ui_control_helpers as helpers
-gr_height = None
+gr_height = 512
max_units = shared.opts.control_max_units
units: list[unit.Unit] = [] # main state variable
controls: list[gr.component] = [] # list of gr controls
@@ -135,7 +135,7 @@ def create_ui(_blocks: gr.Blocks=None):
with gr.Row():
input_type = gr.Radio(label="Input type", choices=['Control only', 'Init image same as control', 'Separate init image'], value='Control only', type='index', elem_id='control_input_type')
with gr.Row():
- denoising_strength = gr.Slider(minimum=0.01, maximum=1.0, step=0.01, label='Denoising strength', value=0.50, elem_id="control_input_denoising_strength")
+ denoising_strength = gr.Slider(minimum=0.01, maximum=1.0, step=0.01, label='Denoising strength', value=0.30, elem_id="control_input_denoising_strength")
with gr.Accordion(open=False, label="Size", elem_id="control_size", elem_classes=["small-accordion"]):
with gr.Tabs():
diff --git a/modules/ui_img2img.py b/modules/ui_img2img.py
index 046c181ce..3c3d63656 100644
--- a/modules/ui_img2img.py
+++ b/modules/ui_img2img.py
@@ -1,7 +1,6 @@
import os
from PIL import Image
import gradio as gr
-import numpy as np
from modules.call_queue import wrap_gradio_gpu_call, wrap_queued_call
from modules import timer, shared, ui_common, ui_sections, generation_parameters_copypaste, processing_vae
@@ -56,7 +55,7 @@ def copy_image(img):
def add_copy_image_controls(tab_name, elem):
with gr.Row(variant="compact", elem_id=f"img2img_copy_to_{tab_name}"):
- for title, name in zip(['➠ Image', '➠ Sketch', '➠ Inpaint', '➠ Composite'], ['img2img', 'sketch', 'inpaint', 'inpaint_sketch']):
+ for title, name in zip(['➠ Image', '➠ Inpaint', '➠ Sketch', '➠ Composite'], ['img2img', 'sketch', 'inpaint', 'composite']):
if name == tab_name:
gr.Button(title, elem_id=f'copy_to_{name}', interactive=False)
copy_image_destinations[name] = elem
@@ -67,33 +66,36 @@ def add_copy_image_controls(tab_name, elem):
with gr.Tabs(elem_id="mode_img2img"):
img2img_selected_tab = gr.State(0) # pylint: disable=abstract-class-instantiated
state = gr.Textbox(value='', visible=False)
- with gr.TabItem('Image', id='img2img', elem_id="img2img_img2img_tab") as tab_img2img:
- init_img = gr.Image(label="Image for img2img", elem_id="img2img_image", show_label=False, source="upload", interactive=True, type="pil", tool="editor", image_mode="RGBA")
+ with gr.TabItem('Image', id='img2img_image', elem_id="img2img_image_tab") as tab_img2img:
+ img_init = gr.Image(label="", elem_id="img2img_image", show_label=False, source="upload", interactive=True, type="pil", tool="editor", image_mode="RGBA", height=512)
interrogate_clip, interrogate_booru = ui_sections.create_interrogate_buttons('img2img')
- add_copy_image_controls('img2img', init_img)
-
- with gr.TabItem('Sketch', id='img2img_sketch', elem_id="img2img_img2img_sketch_tab") as tab_sketch:
- sketch = gr.Image(label="Image for img2img", elem_id="img2img_sketch", show_label=False, source="upload", interactive=True, type="pil", tool="color-sketch", image_mode="RGBA")
- add_copy_image_controls('sketch', sketch)
-
- with gr.TabItem('Inpaint', id='inpaint', elem_id="img2img_inpaint_tab") as tab_inpaint:
- init_img_with_mask = gr.Image(label="Image for inpainting with mask", show_label=False, elem_id="img2maskimg", source="upload", interactive=True, type="pil", tool="sketch", image_mode="RGBA")
- add_copy_image_controls('inpaint', init_img_with_mask)
-
- with gr.TabItem('Composite', id='inpaint_sketch', elem_id="img2img_inpaint_sketch_tab") as tab_inpaint_color:
- inpaint_color_sketch = gr.Image(label="Color sketch inpainting", show_label=False, elem_id="inpaint_sketch", source="upload", interactive=True, type="pil", tool="color-sketch", image_mode="RGBA")
- inpaint_color_sketch_orig = gr.State(None) # pylint: disable=abstract-class-instantiated
- add_copy_image_controls('inpaint_sketch', inpaint_color_sketch)
-
- def update_orig(image, state):
- if image is not None:
- same_size = state is not None and state.size == image.size
- has_exact_match = np.any(np.all(np.array(image) == np.array(state), axis=-1))
- edited = same_size and has_exact_match
- return image if not edited or state is None else state
- return state
-
- inpaint_color_sketch.change(update_orig, [inpaint_color_sketch, inpaint_color_sketch_orig], inpaint_color_sketch_orig)
+ add_copy_image_controls('img2img', img_init)
+
+ with gr.TabItem('Inpaint', id='img2img_inpaint', elem_id="img2img_inpaint_tab") as tab_inpaint:
+ img_inpaint = gr.Image(label="", elem_id="img2img_inpaint", show_label=False, source="upload", interactive=True, type="pil", tool="sketch", image_mode="RGBA", height=512)
+ add_copy_image_controls('inpaint', img_inpaint)
+
+ with gr.TabItem('Sketch', id='img2img_sketch', elem_id="img2img_sketch_tab") as tab_sketch:
+ img_sketch = gr.Image(label="", elem_id="img2img_sketch", show_label=False, source="upload", interactive=True, type="pil", tool="color-sketch", image_mode="RGBA", height=512)
+ add_copy_image_controls('sketch', img_sketch)
+
+ with gr.TabItem('Composite', id='img2img_composite', elem_id="img2img_composite_tab") as tab_inpaint_color:
+ img_composite = gr.Image(label="", show_label=False, elem_id="img2img_composite", source="upload", interactive=True, type="pil", tool="color-sketch", image_mode="RGBA", height=512)
+ img_composite_orig = gr.State(None) # pylint: disable=abstract-class-instantiated
+ img_composite_orig_update = False
+
+ def fn_img_composite_upload():
+ nonlocal img_composite_orig_update
+ img_composite_orig_update = True
+ def fn_img_composite_change(img, img_composite):
+ nonlocal img_composite_orig_update
+ res = img if img_composite_orig_update else img_composite
+ img_composite_orig_update = False
+ return res
+
+ img_composite.upload(fn=fn_img_composite_upload, inputs=[], outputs=[])
+ img_composite.change(fn=fn_img_composite_change, inputs=[img_composite, img_composite_orig], outputs=[img_composite_orig])
+ add_copy_image_controls('composite', img_composite)
with gr.TabItem('Upload', id='inpaint_upload', elem_id="img2img_inpaint_upload_tab") as tab_inpaint_upload:
init_img_inpaint = gr.Image(label="Image for img2img", show_label=False, source="upload", interactive=True, type="pil", elem_id="img_inpaint_base")
@@ -120,13 +122,13 @@ def update_orig(image, state):
with gr.Accordion(open=False, label="Sampler", elem_classes=["small-accordion"], elem_id="img2img_sampler_group"):
steps, sampler_index = ui_sections.create_sampler_and_steps_selection(None, "img2img")
ui_sections.create_sampler_options('img2img')
- resize_mode, resize_name, resize_context, width, height, scale_by, selected_scale_tab = ui_sections.create_resize_inputs('img2img', [init_img, sketch], latent=True, non_zero=False)
+ resize_mode, resize_name, resize_context, width, height, scale_by, selected_scale_tab = ui_sections.create_resize_inputs('img2img', [img_init, img_sketch], latent=True, non_zero=False)
batch_count, batch_size = ui_sections.create_batch_inputs('img2img', accordion=True)
seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w = ui_sections.create_seed_inputs('img2img')
with gr.Accordion(open=False, label="Denoise", elem_classes=["small-accordion"], elem_id="img2img_denoise_group"):
with gr.Row():
- denoising_strength = gr.Slider(minimum=0.0, maximum=0.99, step=0.01, label='Denoising strength', value=0.50, elem_id="img2img_denoising_strength")
+ denoising_strength = gr.Slider(minimum=0.0, maximum=0.99, step=0.01, label='Denoising strength', value=0.30, elem_id="img2img_denoising_strength")
refiner_start = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Denoise start', value=0.0, elem_id="img2img_refiner_start")
full_quality, tiling, hidiffusion, cfg_scale, clip_skip, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, cfg_end = ui_sections.create_advanced_inputs('img2img')
@@ -167,13 +169,8 @@ def select_img2img_tab(tab):
img2img_args = [
dummy_component1, state, dummy_component2,
img2img_prompt, img2img_negative_prompt, img2img_prompt_styles,
- init_img,
- sketch,
- init_img_with_mask,
- inpaint_color_sketch,
- inpaint_color_sketch_orig,
- init_img_inpaint,
- init_mask_inpaint,
+ img_init, img_sketch, img_inpaint, img_composite, img_composite_orig,
+ init_img_inpaint, init_mask_inpaint,
steps,
sampler_index,
mask_blur, mask_alpha,
@@ -225,10 +222,7 @@ def select_img2img_tab(tab):
img2img_batch_files,
img2img_batch_input_dir,
img2img_batch_output_dir,
- init_img,
- sketch,
- init_img_with_mask,
- inpaint_color_sketch,
+ img_init, img_sketch, img_inpaint, img_composite,
init_img_inpaint,
],
outputs=[img2img_prompt, dummy_component],
@@ -285,7 +279,8 @@ def select_img2img_tab(tab):
(seed_resize_from_h, "Seed resize from-2"),
*modules.scripts.scripts_img2img.infotext_fields
]
- generation_parameters_copypaste.add_paste_fields("img2img", init_img, img2img_paste_fields, override_settings)
- generation_parameters_copypaste.add_paste_fields("inpaint", init_img_with_mask, img2img_paste_fields, override_settings)
+ generation_parameters_copypaste.add_paste_fields("img2img", img_init, img2img_paste_fields, override_settings)
+ generation_parameters_copypaste.add_paste_fields("sketch", img_sketch, img2img_paste_fields, override_settings)
+ generation_parameters_copypaste.add_paste_fields("inpaint", img_inpaint, img2img_paste_fields, override_settings)
img2img_bindings = generation_parameters_copypaste.ParamBinding(paste_button=img2img_paste, tabname="img2img", source_text_component=img2img_prompt, source_image_component=None)
generation_parameters_copypaste.register_paste_params_button(img2img_bindings)
From 75dd6219c6b715159a1166f189a74cd76b1ce1b3 Mon Sep 17 00:00:00 2001
From: Disty0
Date: Thu, 28 Nov 2024 22:12:25 +0300
Subject: [PATCH 030/162] Fix Cascade and add full_vqgan_decode
---
modules/processing_args.py | 2 +-
modules/processing_diffusers.py | 2 +-
modules/processing_vae.py | 64 ++++++++++++++++++++++++++++++++-
3 files changed, 65 insertions(+), 3 deletions(-)
diff --git a/modules/processing_args.py b/modules/processing_args.py
index ff766ec04..a716b685e 100644
--- a/modules/processing_args.py
+++ b/modules/processing_args.py
@@ -135,7 +135,7 @@ def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2
prompts = [p.replace('|image|', '<|image_1|>') for p in prompts]
if hasattr(model, 'text_encoder') and hasattr(model, 'tokenizer') and 'prompt_embeds' in possible and prompt_parser_diffusers.embedder is not None:
args['prompt_embeds'] = prompt_parser_diffusers.embedder('prompt_embeds')
- if 'StableCascade' in model.__class__.__name__ and len(getattr(p, 'negative_pooleds', [])) > 0:
+ if 'StableCascade' in model.__class__.__name__ and prompt_parser_diffusers.embedder is not None:
args['prompt_embeds_pooled'] = prompt_parser_diffusers.embedder('positive_pooleds').unsqueeze(0)
elif 'XL' in model.__class__.__name__ and prompt_parser_diffusers.embedder is not None:
args['pooled_prompt_embeds'] = prompt_parser_diffusers.embedder('positive_pooleds')
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index 7b91fcd42..a278f980e 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -352,7 +352,7 @@ def process_decode(p: processing.StableDiffusionProcessing, output):
if not hasattr(model, 'vae'):
if hasattr(model, 'pipe') and hasattr(model.pipe, 'vae'):
model = model.pipe
- if hasattr(model, "vae") and output.images is not None and len(output.images) > 0:
+ if (hasattr(model, "vae") or hasattr(model, "vqgan")) and output.images is not None and len(output.images) > 0:
if p.hr_resize_mode > 0 and (p.hr_upscaler != 'None' or p.hr_resize_mode == 5):
width = max(getattr(p, 'width', 0), getattr(p, 'hr_upscale_to_x', 0))
height = max(getattr(p, 'height', 0), getattr(p, 'hr_upscale_to_y', 0))
diff --git a/modules/processing_vae.py b/modules/processing_vae.py
index 3c0357c81..1c4a45f07 100644
--- a/modules/processing_vae.py
+++ b/modules/processing_vae.py
@@ -33,6 +33,62 @@ def create_latents(image, p, dtype=None, device=None):
return latents
+def full_vqgan_decode(latents, model):
+ t0 = time.time()
+ if model is None or not hasattr(model, 'vqgan'):
+ shared.log.error('VQGAN not found in model')
+ return []
+ if debug:
+ devices.torch_gc(force=True)
+ shared.mem_mon.reset()
+
+ base_device = None
+ if shared.opts.diffusers_move_unet and not getattr(model, 'has_accelerate', False):
+ base_device = sd_models.move_base(model, devices.cpu)
+
+ if shared.opts.diffusers_offload_mode == "balanced":
+ shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+ elif shared.opts.diffusers_offload_mode != "sequential":
+ sd_models.move_model(model.vqgan, devices.device)
+
+ latents = latents.to(devices.device, dtype=model.vqgan.dtype)
+
+ #normalize latents
+ scaling_factor = model.vqgan.config.get("scale_factor", None)
+ if scaling_factor:
+ latents = latents * scaling_factor
+
+ vae_name = os.path.splitext(os.path.basename(sd_vae.loaded_vae_file))[0] if sd_vae.loaded_vae_file is not None else "default"
+ vae_stats = f'name="{vae_name}" dtype={model.vqgan.dtype} device={model.vqgan.device}'
+ latents_stats = f'shape={latents.shape} dtype={latents.dtype} device={latents.device}'
+ stats = f'vae {vae_stats} latents {latents_stats}'
+
+ log_debug(f'VAE config: {model.vqgan.config}')
+ try:
+ decoded = model.vqgan.decode(latents).sample.clamp(0, 1)
+ except Exception as e:
+ shared.log.error(f'VAE decode: {stats} {e}')
+ errors.display(e, 'VAE decode')
+ decoded = []
+
+ # delete vae after OpenVINO compile
+ if 'VAE' in shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx" and shared.compiled_model_state.first_pass_vae:
+ shared.compiled_model_state.first_pass_vae = False
+ if not shared.opts.openvino_disable_memory_cleanup and hasattr(shared.sd_model, "vqgan"):
+ model.vqgan.apply(sd_models.convert_to_faketensors)
+ devices.torch_gc(force=True)
+
+ if shared.opts.diffusers_offload_mode == "balanced":
+ shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+ elif shared.opts.diffusers_move_unet and not getattr(model, 'has_accelerate', False) and base_device is not None:
+ sd_models.move_base(model, base_device)
+ t1 = time.time()
+ if debug:
+ log_debug(f'VAE memory: {shared.mem_mon.read()}')
+ shared.log.debug(f'VAE decode: {stats} time={round(t1-t0, 3)}')
+ return decoded
+
+
def full_vae_decode(latents, model):
t0 = time.time()
if not hasattr(model, 'vae') and hasattr(model, 'pipe'):
@@ -161,7 +217,7 @@ def vae_decode(latents, model, output_type='np', full_quality=True, width=None,
return []
if shared.state.interrupted or shared.state.skipped:
return []
- if not hasattr(model, 'vae'):
+ if not hasattr(model, 'vae') and not hasattr(model, 'vqgan'):
shared.log.error('VAE not found in model')
return []
@@ -176,12 +232,18 @@ def vae_decode(latents, model, output_type='np', full_quality=True, width=None,
decoded = latents.float().cpu().numpy()
elif full_quality and hasattr(model, "vae"):
decoded = full_vae_decode(latents=latents, model=model)
+ elif hasattr(model, "vqgan"):
+ decoded = full_vqgan_decode(latents=latents, model=model)
else:
decoded = taesd_vae_decode(latents=latents)
if torch.is_tensor(decoded):
if hasattr(model, 'image_processor'):
imgs = model.image_processor.postprocess(decoded, output_type=output_type)
+ elif hasattr(model, "vqgan"):
+ imgs = decoded.permute(0, 2, 3, 1).cpu().float().numpy()
+ if output_type == "pil":
+ imgs = model.numpy_to_pil(imgs)
else:
import diffusers
model.image_processor = diffusers.image_processor.VaeImageProcessor()
From 1f39d718f9f141c431fabce6bede20839aa22863 Mon Sep 17 00:00:00 2001
From: P-Hellmann
Date: Fri, 29 Nov 2024 08:58:22 +0100
Subject: [PATCH 031/162] Small changes to black-teal-reimagined
---
javascript/black-teal-reimagined.css | 22 ++++++++++++++++++++++
1 file changed, 22 insertions(+)
diff --git a/javascript/black-teal-reimagined.css b/javascript/black-teal-reimagined.css
index e5618c02c..94fccdea9 100644
--- a/javascript/black-teal-reimagined.css
+++ b/javascript/black-teal-reimagined.css
@@ -813,10 +813,18 @@ textarea[rows="1"] {
background: var(--background-color);
box-shadow: var(--shadow-md);
border-radius: var(--radius-lg);
+ transform: translateX(100%);
+ animation: slideIn 0.5s forwards;
overflow: hidden;
/* Prevents overflow of content */
}
+@keyframes slideIn {
+ to {
+ transform: translateX(0);
+ }
+}
+
/* Extra Networks Styles */
.extra-networks {
background: var(--background-color);
@@ -1032,6 +1040,20 @@ textarea[rows="1"] {
height: 100%;
}
+/* Token counters styling */
+
+#txt2img_token_counter, #txt2img_negative_token_counter {
+ display: flex;
+ flex-direction: column;
+ justify-content: space-evenly;
+ padding: 10px;
+}
+
+#txt2img_prompt_container {
+ margin: 5px;
+ padding: 0px;
+}
+
/* Based on Gradio Built-in Dark Theme */
:root,
.light,
From 3e3501218b0179713f2eb92e4e59f5aeafa8349f Mon Sep 17 00:00:00 2001
From: P-Hellmann
Date: Fri, 29 Nov 2024 10:01:53 +0100
Subject: [PATCH 032/162] Removed redundant css
---
javascript/black-teal-reimagined.css | 36 ++++------------------------
1 file changed, 4 insertions(+), 32 deletions(-)
diff --git a/javascript/black-teal-reimagined.css b/javascript/black-teal-reimagined.css
index 94fccdea9..70315782d 100644
--- a/javascript/black-teal-reimagined.css
+++ b/javascript/black-teal-reimagined.css
@@ -655,27 +655,6 @@ svg.feather.feather-image,
font-weight: normal;
}
-#txt2img_prompt,
-#txt2img_neg_prompt,
-#img2img_prompt,
-#img2img _neg_prompt,
-#control_prompt,
-#control_neg_prompt {
- background-color: var(--background-color);
- box-shadow: none !important;
-}
-
-#txt2img_prompt>label>textarea,
-#txt2img_neg_prompt>label>textarea,
-#img2img_prompt>label>textarea,
-#img2img_neg_prompt>label>textarea,
-#control_prompt>label>textarea,
-#control_neg_prompt>label>textarea {
- font-size: 1.0em;
- line-height: 1.4em;
- border-radius: var(--radius-md);
-}
-
#txt2img_styles,
#img2img_styles,
#control_styles {
@@ -746,11 +725,6 @@ svg.feather.feather-image,
margin-left: 1em;
}
-#settings_search textarea {
- padding: 0.5em;
- height: 2.2em !important;
-}
-
#txt2img_cfg_scale {
min-width: 200px;
}
@@ -762,12 +736,6 @@ svg.feather.feather-image,
margin-bottom: 0.2em;
}
-textarea[rows="1"] {
- height: 33px !important;
- width: 99% !important;
- padding: 8px !important;
-}
-
#extras_upscale {
margin-top: 10px;
}
@@ -1054,6 +1022,10 @@ textarea[rows="1"] {
padding: 0px;
}
+#text2img_prompt label, #text2img_neg_prompt label {
+ margin: 0px;
+}
+
/* Based on Gradio Built-in Dark Theme */
:root,
.light,
From 73dbdbbddc2e4adffb5d91d9b0805e0cf5046edc Mon Sep 17 00:00:00 2001
From: P-Hellmann
Date: Fri, 29 Nov 2024 10:58:40 +0100
Subject: [PATCH 033/162] Rearrange forms and tab-nav
---
javascript/black-teal-reimagined.css | 34 ++++++++++++----------------
1 file changed, 15 insertions(+), 19 deletions(-)
diff --git a/javascript/black-teal-reimagined.css b/javascript/black-teal-reimagined.css
index 70315782d..9e7d357a4 100644
--- a/javascript/black-teal-reimagined.css
+++ b/javascript/black-teal-reimagined.css
@@ -228,16 +228,17 @@ input[type='range']::-moz-range-track {
.tab-nav {
display: flex;
/* Use flexbox for layout */
- justify-content: space-around;
+ justify-content: space-evenly;
/* Space out the tabs evenly */
align-items: center;
/* Center items vertically */
background: var(--background-color);
/* Background color */
- border-bottom: 1px solid var(--highlight-color) !important;
+ border-bottom: 3px solid var(--highlight-color) !important;
/* Bottom border for separation */
box-shadow: var(--shadow-md);
/* Shadow for depth */
+ margin-bottom: 5px;
}
/* Individual Tab Styles */
@@ -246,19 +247,24 @@ input[type='range']::-moz-range-track {
/* No background for default state */
color: var(--text-color);
/* Text color */
- border: none;
+ border: 1px solid var(--highlight-color);
/* No border */
- border-radius: var(--radius-xxxl);
+ border-radius: var(--radius-xxl);
/* Rounded corners */
cursor: pointer;
/* Pointer cursor */
transition: background 0.3s ease, color 0.3s ease;
/* Smooth transition */
+ padding-top: 5px;
+ padding-bottom: 5px;
+ padding-right: 10px;
+ padding-left: 10px;
+ margin-bottom: 3px;
}
/* Active Tab Style */
-.tab-nav>button.active {
- background: var(--highlight-color);
+.tab-nav>button.selected {
+ background: var(--primary-100);
/* Highlight active tab */
color: var(--background-color);
/* Change text color for active tab */
@@ -386,7 +392,8 @@ div.form {
border-width: 0;
box-shadow: var(--shadow-md);
background: var(--background-fill-primary);
- padding: 16px;
+ border-bottom: 3px solid var(--highlight-color);
+ padding: 3px;
border-radius: var(--radius-md);
}
@@ -720,11 +727,6 @@ svg.feather.feather-image,
width: 15em;
}
-#settings_search {
- margin-top: 1em;
- margin-left: 1em;
-}
-
#txt2img_cfg_scale {
min-width: 200px;
}
@@ -749,7 +751,6 @@ svg.feather.feather-image,
min-width: var(--left-column);
max-width: var(--left-column);
background-color: var(--neutral-950);
- padding-top: 16px;
}
#pnginfo_html2_info {
@@ -837,11 +838,6 @@ svg.feather.feather-image,
background: var(--highlight-color);
}
-/* Extra Networks Tab */
-.extra-networks-tab {
- padding: 0 !important;
-}
-
/* Subdirectories Styles */
.extra-network-subdirs {
background: var(--input-background-fill);
@@ -1014,7 +1010,7 @@ svg.feather.feather-image,
display: flex;
flex-direction: column;
justify-content: space-evenly;
- padding: 10px;
+ padding: 5px;
}
#txt2img_prompt_container {
From e3704ba057bbeeb58b63b107b0feb4426bf6a5bf Mon Sep 17 00:00:00 2001
From: P-Hellmann
Date: Fri, 29 Nov 2024 11:23:07 +0100
Subject: [PATCH 034/162] small changes
---
extensions-builtin/sdnext-modernui | 2 +-
javascript/black-teal-reimagined.css | 44 ++++------------------------
2 files changed, 7 insertions(+), 39 deletions(-)
diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui
index 3008cee4b..f083ce41a 160000
--- a/extensions-builtin/sdnext-modernui
+++ b/extensions-builtin/sdnext-modernui
@@ -1 +1 @@
-Subproject commit 3008cee4b67bb00f8f1a4fe4510ec27ba92aa418
+Subproject commit f083ce41a9f18b500f26745ea9e86855e509d2cb
diff --git a/javascript/black-teal-reimagined.css b/javascript/black-teal-reimagined.css
index 9e7d357a4..eb6942b8c 100644
--- a/javascript/black-teal-reimagined.css
+++ b/javascript/black-teal-reimagined.css
@@ -216,6 +216,7 @@ input[type='range']::-moz-range-track {
::-webkit-scrollbar-track {
background: var(--scrollbar-bg);
+ border-radius: var(--radius-lg);
}
::-webkit-scrollbar-thumb {
@@ -234,11 +235,14 @@ input[type='range']::-moz-range-track {
/* Center items vertically */
background: var(--background-color);
/* Background color */
- border-bottom: 3px solid var(--highlight-color) !important;
+ border-bottom: 1px dashed var(--highlight-color) !important;
/* Bottom border for separation */
box-shadow: var(--shadow-md);
/* Shadow for depth */
margin-bottom: 5px;
+ /* Add some space between the tab nav and the content */
+ padding-bottom: 5px;
+ /* Add space between buttons and border */
}
/* Individual Tab Styles */
@@ -395,6 +399,7 @@ div.form {
border-bottom: 3px solid var(--highlight-color);
padding: 3px;
border-radius: var(--radius-md);
+ margin: 1px;
}
/* Gradio Style Classes */
@@ -772,12 +777,6 @@ svg.feather.feather-image,
/* Extra Networks Container */
#extra_networks_root {
- width: 300px;
- /* Set a fixed width for the sidebar */
- position: absolute;
- height: auto;
- right: 0;
- top: 13em;
z-index: 100;
background: var(--background-color);
box-shadow: var(--shadow-md);
@@ -797,15 +796,6 @@ svg.feather.feather-image,
/* Extra Networks Styles */
.extra-networks {
background: var(--background-color);
- padding: var(--block-label-padding);
- border-radius: var(--radius-lg);
-}
-
-/* Extra Networks Div Styles */
-.extra-networks>div {
- margin: 0;
- border-bottom: none !important;
- gap: 0.3em 0;
}
.extra-networks .tab-nav>button:hover {
@@ -822,32 +812,10 @@ svg.feather.feather-image,
margin-top: 50px;
}
-/* Individual Buttons */
-.extra-networks .buttons>button {
- margin-left: -0.2em;
- height: 1.4em;
- color: var(--primary-300) !important;
- font-size: 20px !important;
- background: var(--button-primary-background-fill);
- border: none;
- border-radius: var(--radius-sm);
- transition: var(--transition);
-}
-
.extra-networks .buttons>button:hover {
background: var(--highlight-color);
}
-/* Subdirectories Styles */
-.extra-network-subdirs {
- background: var(--input-background-fill);
- overflow-x: hidden;
- overflow-y: auto;
- min-width: 120px;
- padding-top: 0.5em;
- margin-top: -4px !important;
-}
-
/* Extra Networks Page */
.extra-networks-page {
display: flex;
From fd5df851ad5dff22167f02876dd22fb97e1bbf4c Mon Sep 17 00:00:00 2001
From: P-Hellmann
Date: Fri, 29 Nov 2024 12:14:24 +0100
Subject: [PATCH 035/162] networks page rework
---
javascript/black-teal-reimagined.css | 27 ++++++++++++++++++++-------
1 file changed, 20 insertions(+), 7 deletions(-)
diff --git a/javascript/black-teal-reimagined.css b/javascript/black-teal-reimagined.css
index eb6942b8c..b1e840348 100644
--- a/javascript/black-teal-reimagined.css
+++ b/javascript/black-teal-reimagined.css
@@ -816,11 +816,6 @@ svg.feather.feather-image,
background: var(--highlight-color);
}
-/* Extra Networks Page */
-.extra-networks-page {
- display: flex;
-}
-
/* Network Cards Container */
.extra-network-cards {
display: flex;
@@ -828,6 +823,8 @@ svg.feather.feather-image,
overflow-y: auto;
overflow-x: hidden;
align-content: flex-start;
+ padding-top: 20px;
+ justify-content: center;
width: 100%;
/* Ensures it takes full width */
}
@@ -872,16 +869,23 @@ svg.feather.feather-image,
box-shadow: var(--button-shadow);
min-height: 30px;
border-radius: var(--radius-md);
+ z-index: 9999;
}
/* Hover Effects */
+.extra-network-cards .card:hover {
+ transform: scale(1.3);
+ z-index: 9999; /* Use a high value to ensure it appears on top */
+ transition: transform 0.3s ease, z-index 0s; /* Smooth transition */
+}
+
.extra-network-cards .card:hover .overlay {
- background: rgba(0, 0, 0, 0.70);
+ z-index: 10000; /* Ensure overlay is also on top */
}
.extra-network-cards .card:hover .preview {
box-shadow: none;
- filter: grayscale(100%);
+ filter: grayscale(0%);
}
/* Tags Styles */
@@ -913,6 +917,15 @@ svg.feather.feather-image,
font-size: 34px !important;
}
+.extra-network-cards .card .actions {
+ background: none;
+}
+
+.extra-network-cards .card .actions .details {
+ bottom: 50px;
+ background-color: var(--neutral-800);
+}
+
.extra-network-cards .card .actions>span:hover {
color: var(--highlight-color);
}
From c76619aa18f7f60d42226aa529b3fde711d4dc94 Mon Sep 17 00:00:00 2001
From: P-Hellmann
Date: Fri, 29 Nov 2024 12:24:19 +0100
Subject: [PATCH 036/162] mini changes
---
javascript/black-teal-reimagined.css | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/javascript/black-teal-reimagined.css b/javascript/black-teal-reimagined.css
index b1e840348..b7567ce75 100644
--- a/javascript/black-teal-reimagined.css
+++ b/javascript/black-teal-reimagined.css
@@ -631,11 +631,6 @@ svg.feather.feather-image,
color: #888;
}
-.extra-networks {
- border-left: 2px solid var(--highlight-color) !important;
- padding-left: 4px;
-}
-
.image-buttons {
justify-content: center;
gap: 0 !important;
@@ -795,7 +790,8 @@ svg.feather.feather-image,
/* Extra Networks Styles */
.extra-networks {
- background: var(--background-color);
+ border-left: 2px solid var(--highlight-color) !important;
+ padding-left: 4px;
}
.extra-networks .tab-nav>button:hover {
From b74166f9cb878c087a857ba29db13ced3c6333ca Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Fri, 29 Nov 2024 07:18:05 -0500
Subject: [PATCH 037/162] detailer add augment setting
Signed-off-by: Vladimir Mandic
---
modules/devices.py | 2 +-
modules/postprocess/yolo.py | 2 +-
modules/shared.py | 9 +++++----
3 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/modules/devices.py b/modules/devices.py
index 56ac50091..9ca1863a5 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -471,7 +471,7 @@ def set_cuda_params():
device_name = get_raw_openvino_device()
else:
device_name = torch.device(get_optimal_device_name())
- log.info(f'Torch parameters: backend={backend} device={device_name} config={opts.cuda_dtype} dtype={dtype} vae={dtype_vae} unet={dtype_unet} context={inference_context.__name__} nohalf={opts.no_half} nohalfvae={opts.no_half_vae} upscast={opts.upcast_sampling} deterministic={opts.cudnn_deterministic} test-fp16={fp16_ok} test-bf16={bf16_ok} optimization="{opts.cross_attention_optimization}"')
+ log.info(f'Torch parameters: backend={backend} device={device_name} config={opts.cuda_dtype} dtype={dtype} vae={dtype_vae} unet={dtype_unet} context={inference_context.__name__} nohalf={opts.no_half} nohalfvae={opts.no_half_vae} upcast={opts.upcast_sampling} deterministic={opts.cudnn_deterministic} test-fp16={fp16_ok} test-bf16={bf16_ok} optimization="{opts.cross_attention_optimization}"')
def cond_cast_unet(tensor):
diff --git a/modules/postprocess/yolo.py b/modules/postprocess/yolo.py
index f42b6bb9f..5deab1282 100644
--- a/modules/postprocess/yolo.py
+++ b/modules/postprocess/yolo.py
@@ -72,7 +72,7 @@ def predict(
imgsz: int = 640,
half: bool = True,
device = devices.device,
- augment: bool = True,
+ augment: bool = shared.opts.detailer_augment,
agnostic: bool = False,
retina: bool = False,
mask: bool = True,
diff --git a/modules/shared.py b/modules/shared.py
index 5b54a0de2..720819135 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -824,7 +824,7 @@ def get_default_modes():
'postprocessing_enable_in_main_ui': OptionInfo([], "Additional postprocessing operations", gr.Dropdown, lambda: {"multiselect":True, "choices": [x.name for x in shared_items.postprocessing_scripts()]}),
'postprocessing_operation_order': OptionInfo([], "Postprocessing operation order", gr.Dropdown, lambda: {"multiselect":True, "choices": [x.name for x in shared_items.postprocessing_scripts()]}),
- "postprocessing_sep_img2img": OptionInfo("Img2Img & Inpainting
", "", gr.HTML),
+ "postprocessing_sep_img2img": OptionInfo("Inpaint
", "", gr.HTML),
"img2img_color_correction": OptionInfo(False, "Apply color correction"),
"mask_apply_overlay": OptionInfo(True, "Apply mask as overlay"),
"img2img_background_color": OptionInfo("#ffffff", "Image transparent color fill", gr.ColorPicker, {}),
@@ -832,7 +832,7 @@ def get_default_modes():
"initial_noise_multiplier": OptionInfo(1.0, "Noise multiplier for image processing", gr.Slider, {"minimum": 0.1, "maximum": 1.5, "step": 0.01, "visible": not native}),
"img2img_extra_noise": OptionInfo(0.0, "Extra noise multiplier for img2img", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01, "visible": not native}),
- # "postprocessing_sep_detailer": OptionInfo("Detailer
", "", gr.HTML),
+ "postprocessing_sep_detailer": OptionInfo("Detailer
", "", gr.HTML),
"detailer_model": OptionInfo("Detailer", "Detailer model", gr.Radio, lambda: {"choices": [x.name() for x in detailers], "visible": False}),
"detailer_classes": OptionInfo("", "Detailer classes", gr.Textbox, { "visible": False}),
"detailer_conf": OptionInfo(0.6, "Min confidence", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.05, "visible": False}),
@@ -844,11 +844,12 @@ def get_default_modes():
"detailer_blur": OptionInfo(10, "Item edge blur", gr.Slider, {"minimum": 0, "maximum": 100, "step": 1, "visible": False}),
"detailer_strength": OptionInfo(0.5, "Detailer strength", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01, "visible": False}),
"detailer_models": OptionInfo(['face-yolo8n'], "Detailer models", gr.Dropdown, lambda: {"multiselect":True, "choices": list(yolo.list), "visible": False}),
- "code_former_weight": OptionInfo(0.2, "CodeFormer weight parameter", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01, "visible": False}),
"detailer_unload": OptionInfo(False, "Move detailer model to CPU when complete"),
+ "detailer_augment": OptionInfo(True, "Detailer use model augment"),
"postprocessing_sep_face_restore": OptionInfo("Face restore
", "", gr.HTML),
- "face_restoration_model": OptionInfo("Face restorer", "Face restoration", gr.Radio, lambda: {"choices": ['None'] + [x.name() for x in face_restorers]}),
+ "face_restoration_model": OptionInfo("None", "Face restoration", gr.Radio, lambda: {"choices": ['None'] + [x.name() for x in face_restorers]}),
+ "code_former_weight": OptionInfo(0.2, "CodeFormer weight parameter", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01}),
"postprocessing_sep_upscalers": OptionInfo("Upscaling
", "", gr.HTML),
"upscaler_unload": OptionInfo(False, "Unload upscaler after processing"),
From f2d5307c54cae2157273c47adfff302c9ed06a4d Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Fri, 29 Nov 2024 07:49:25 -0500
Subject: [PATCH 038/162] update modernui reference
Signed-off-by: Vladimir Mandic
---
extensions-builtin/sdnext-modernui | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui
index f083ce41a..3008cee4b 160000
--- a/extensions-builtin/sdnext-modernui
+++ b/extensions-builtin/sdnext-modernui
@@ -1 +1 @@
-Subproject commit f083ce41a9f18b500f26745ea9e86855e509d2cb
+Subproject commit 3008cee4b67bb00f8f1a4fe4510ec27ba92aa418
From a635421231743e0f07f4005dd83ef357f4ee0b42 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Fri, 29 Nov 2024 08:01:26 -0500
Subject: [PATCH 039/162] lint fixes
Signed-off-by: Vladimir Mandic
---
extensions-builtin/sdnext-modernui | 2 +-
modules/lora/lora.py | 8 --------
modules/lora/lora_convert.py | 17 +++++++++--------
modules/lora/network.py | 6 ++++--
modules/lora/network_norm.py | 1 +
modules/lora/network_oft.py | 3 ++-
modules/lora/networks.py | 14 +++++++++-----
7 files changed, 26 insertions(+), 25 deletions(-)
delete mode 100644 modules/lora/lora.py
diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui
index 3008cee4b..f083ce41a 160000
--- a/extensions-builtin/sdnext-modernui
+++ b/extensions-builtin/sdnext-modernui
@@ -1 +1 @@
-Subproject commit 3008cee4b67bb00f8f1a4fe4510ec27ba92aa418
+Subproject commit f083ce41a9f18b500f26745ea9e86855e509d2cb
diff --git a/modules/lora/lora.py b/modules/lora/lora.py
deleted file mode 100644
index 33adfe05c..000000000
--- a/modules/lora/lora.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# import networks
-#
-# list_available_loras = networks.list_available_networks
-# available_loras = networks.available_networks
-# available_lora_aliases = networks.available_network_aliases
-# available_lora_hash_lookup = networks.available_network_hash_lookup
-# forbidden_lora_aliases = networks.forbidden_network_aliases
-# loaded_loras = networks.loaded_networks
diff --git a/modules/lora/lora_convert.py b/modules/lora/lora_convert.py
index 6bf563125..dc86a24cf 100644
--- a/modules/lora/lora_convert.py
+++ b/modules/lora/lora_convert.py
@@ -107,14 +107,14 @@ def make_unet_conversion_map() -> Dict[str, str]:
class KeyConvert:
def __init__(self):
- self.is_sdxl = True if shared.sd_model_type == "sdxl" else False
- self.UNET_CONVERSION_MAP = make_unet_conversion_map() if self.is_sdxl else None
- self.LORA_PREFIX_UNET = "lora_unet_"
- self.LORA_PREFIX_TEXT_ENCODER = "lora_te_"
- self.OFT_PREFIX_UNET = "oft_unet_"
- # SDXL: must starts with LORA_PREFIX_TEXT_ENCODER
- self.LORA_PREFIX_TEXT_ENCODER1 = "lora_te1_"
- self.LORA_PREFIX_TEXT_ENCODER2 = "lora_te2_"
+ self.is_sdxl = True if shared.sd_model_type == "sdxl" else False
+ self.UNET_CONVERSION_MAP = make_unet_conversion_map() if self.is_sdxl else None
+ self.LORA_PREFIX_UNET = "lora_unet_"
+ self.LORA_PREFIX_TEXT_ENCODER = "lora_te_"
+ self.OFT_PREFIX_UNET = "oft_unet_"
+ # SDXL: must starts with LORA_PREFIX_TEXT_ENCODER
+ self.LORA_PREFIX_TEXT_ENCODER1 = "lora_te1_"
+ self.LORA_PREFIX_TEXT_ENCODER2 = "lora_te2_"
def __call__(self, key):
if self.is_sdxl:
@@ -446,6 +446,7 @@ def _convert_sd_scripts_to_ai_toolkit(sds_sd):
lora_name_alpha = f"{lora_name}.alpha"
diffusers_name = _convert_text_encoder_lora_key(key, lora_name)
+ sd_lora_rank = 1
if lora_name.startswith(("lora_te_", "lora_te1_")):
down_weight = sds_sd.pop(key)
sd_lora_rank = down_weight.shape[0]
diff --git a/modules/lora/network.py b/modules/lora/network.py
index 0785ef9f4..8e6f87368 100644
--- a/modules/lora/network.py
+++ b/modules/lora/network.py
@@ -1,9 +1,11 @@
import os
-from collections import namedtuple
import enum
+from typing import Union
+from collections import namedtuple
from modules import sd_models, hashes, shared
+
NetworkWeights = namedtuple('NetworkWeights', ['network_key', 'sd_key', 'w', 'sd_module'])
metadata_tags_order = {"ss_sd_model_name": 1, "ss_resolution": 2, "ss_clip_skip": 3, "ss_num_train_images": 10, "ss_tag_frequency": 20}
@@ -105,7 +107,7 @@ def __init__(self, name, network_on_disk: NetworkOnDisk):
class ModuleType:
- def create_module(self, net: Network, weights: NetworkWeights) -> Network | None: # pylint: disable=W0613
+ def create_module(self, net: Network, weights: NetworkWeights) -> Union[Network, None]: # pylint: disable=W0613
return None
diff --git a/modules/lora/network_norm.py b/modules/lora/network_norm.py
index e8f1740e3..5d059e92e 100644
--- a/modules/lora/network_norm.py
+++ b/modules/lora/network_norm.py
@@ -1,5 +1,6 @@
import modules.lora.network as network
+
class ModuleTypeNorm(network.ModuleType):
def create_module(self, net: network.Network, weights: network.NetworkWeights):
if all(x in weights.w for x in ["w_norm", "b_norm"]):
diff --git a/modules/lora/network_oft.py b/modules/lora/network_oft.py
index 808286066..e2e61ad45 100644
--- a/modules/lora/network_oft.py
+++ b/modules/lora/network_oft.py
@@ -1,7 +1,7 @@
import torch
+from einops import rearrange
import modules.lora.network as network
from modules.lora.lyco_helpers import factorization
-from einops import rearrange
class ModuleTypeOFT(network.ModuleType):
@@ -10,6 +10,7 @@ def create_module(self, net: network.Network, weights: network.NetworkWeights):
return NetworkModuleOFT(net, weights)
return None
+
# Supports both kohya-ss' implementation of COFT https://github.com/kohya-ss/sd-scripts/blob/main/networks/oft.py
# and KohakuBlueleaf's implementation of OFT/COFT https://github.com/KohakuBlueleaf/LyCORIS/blob/dev/lycoris/modules/diag_oft.py
class NetworkModuleOFT(network.NetworkModule): # pylint: disable=abstract-method
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index c6fde3e04..737623b1e 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -3,6 +3,9 @@
import re
import time
import concurrent
+import torch
+import diffusers.models.lora
+
import modules.lora.network as network
import modules.lora.network_lora as network_lora
import modules.lora.network_hada as network_hada
@@ -14,8 +17,6 @@
import modules.lora.network_glora as network_glora
import modules.lora.network_overrides as network_overrides
import modules.lora.lora_convert as lora_convert
-import torch
-import diffusers.models.lora
from modules import shared, devices, sd_models, sd_models_compile, errors, scripts, files_cache, model_quant
@@ -74,7 +75,7 @@ def assign_network_names_to_compvis_modules(sd_model):
shared.sd_model.network_layer_mapping = network_layer_mapping
-def load_diffusers(name, network_on_disk, lora_scale=shared.opts.extra_networks_default_multiplier) -> network.Network | None:
+def load_diffusers(name, network_on_disk, lora_scale=shared.opts.extra_networks_default_multiplier) -> Union[network.Network, None]:
name = name.replace(".", "_")
shared.log.debug(f'Load network: type=LoRA name="{name}" file="{network_on_disk.filename}" detected={network_on_disk.sd_version} method=diffusers scale={lora_scale} fuse={shared.opts.lora_fuse_diffusers}')
if not shared.native:
@@ -103,7 +104,7 @@ def load_diffusers(name, network_on_disk, lora_scale=shared.opts.extra_networks_
return net
-def load_network(name, network_on_disk) -> network.Network | None:
+def load_network(name, network_on_disk) -> Union[network.Network, None]:
if not shared.sd_loaded:
return None
@@ -173,6 +174,7 @@ def load_network(name, network_on_disk) -> network.Network | None:
net.bundle_embeddings = bundle_embeddings
return net
+
def maybe_recompile_model(names, te_multipliers):
recompile_model = False
if shared.compiled_model_state is not None and shared.compiled_model_state.is_compiled:
@@ -186,7 +188,7 @@ def maybe_recompile_model(names, te_multipliers):
if not recompile_model:
if len(loaded_networks) > 0 and debug:
shared.log.debug('Model Compile: Skipping LoRa loading')
- return
+ return recompile_model
else:
recompile_model = True
shared.compiled_model_state.lora_model = []
@@ -277,6 +279,7 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No
t1 = time.time()
timer['load'] += t1 - t0
+
def set_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown, ex_bias):
weights_backup = getattr(self, "network_weights_backup", None)
bias_backup = getattr(self, "network_bias_backup", None)
@@ -389,6 +392,7 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
t1 = time.time()
timer['apply'] += t1 - t0
+
def network_load():
sd_model = getattr(shared.sd_model, "pipe", shared.sd_model) # wrapped model compatiblility
for component_name in ['text_encoder','text_encoder_2', 'unet', 'transformer']:
From 6aa7a4707ef93a993a043d134811ccb3321a6f7f Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Fri, 29 Nov 2024 09:39:38 -0500
Subject: [PATCH 040/162] modules.lora full integration
Signed-off-by: Vladimir Mandic
---
.../Lora/scripts/lora_script.py | 1 +
modules/api/api.py | 2 +
modules/api/endpoints.py | 10 +++
modules/extensions.py | 2 +-
modules/extra_networks.py | 7 ++-
modules/hashes.py | 7 +++
modules/infotext.py | 35 +++++++++++
modules/loader.py | 2 +-
modules/lora/networks.py | 25 --------
modules/sd_checkpoint.py | 6 ++
modules/shared.py | 5 +-
modules/ui_extra_networks.py | 10 +--
modules/{lora => }/ui_extra_networks_lora.py | 0
modules/ui_models.py | 6 +-
scripts/lora_script.py | 62 -------------------
webui.py | 19 +++++-
16 files changed, 98 insertions(+), 101 deletions(-)
rename modules/{lora => }/ui_extra_networks_lora.py (100%)
delete mode 100644 scripts/lora_script.py
diff --git a/extensions-builtin/Lora/scripts/lora_script.py b/extensions-builtin/Lora/scripts/lora_script.py
index dea2985b3..24723dd7f 100644
--- a/extensions-builtin/Lora/scripts/lora_script.py
+++ b/extensions-builtin/Lora/scripts/lora_script.py
@@ -56,6 +56,7 @@ def network_replacement(m):
hashes = {x[0].strip().replace(",", ""): x[1].strip() for x in hashes}
d["Prompt"] = re.sub(re_lora, network_replacement, d["Prompt"])
+
if not shared.native:
script_callbacks.on_app_started(api_networks)
script_callbacks.on_before_ui(before_ui)
diff --git a/modules/api/api.py b/modules/api/api.py
index d48cbf521..7d2c2f279 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -79,6 +79,7 @@ def __init__(self, app: FastAPI, queue_lock: Lock):
self.add_api_route("/sdapi/v1/sd-vae", endpoints.get_sd_vaes, methods=["GET"], response_model=List[models.ItemVae])
self.add_api_route("/sdapi/v1/extensions", endpoints.get_extensions_list, methods=["GET"], response_model=List[models.ItemExtension])
self.add_api_route("/sdapi/v1/extra-networks", endpoints.get_extra_networks, methods=["GET"], response_model=List[models.ItemExtraNetwork])
+ self.add_api_route("/sdapi/v1/loras", endpoints.get_loras, methods=["GET"], response_model=List[dict])
# functional api
self.add_api_route("/sdapi/v1/png-info", endpoints.post_pnginfo, methods=["POST"], response_model=models.ResImageInfo)
@@ -88,6 +89,7 @@ def __init__(self, app: FastAPI, queue_lock: Lock):
self.add_api_route("/sdapi/v1/unload-checkpoint", endpoints.post_unload_checkpoint, methods=["POST"])
self.add_api_route("/sdapi/v1/reload-checkpoint", endpoints.post_reload_checkpoint, methods=["POST"])
self.add_api_route("/sdapi/v1/refresh-vae", endpoints.post_refresh_vae, methods=["POST"])
+ self.add_api_route("/sdapi/v1/refresh-loras", endpoints.post_refresh_loras, methods=["POST"])
self.add_api_route("/sdapi/v1/history", endpoints.get_history, methods=["GET"], response_model=List[str])
self.add_api_route("/sdapi/v1/history", endpoints.post_history, methods=["POST"], response_model=int)
diff --git a/modules/api/endpoints.py b/modules/api/endpoints.py
index 61993db84..1c56b7171 100644
--- a/modules/api/endpoints.py
+++ b/modules/api/endpoints.py
@@ -40,6 +40,12 @@ def convert_embeddings(embeddings):
return {"loaded": convert_embeddings(db.word_embeddings), "skipped": convert_embeddings(db.skipped_embeddings)}
+def get_loras():
+ from modules.lora import network, networks
+ def create_lora_json(obj: network.NetworkOnDisk):
+ return { "name": obj.name, "alias": obj.alias, "path": obj.filename, "metadata": obj.metadata }
+ return [create_lora_json(obj) for obj in networks.available_networks.values()]
+
def get_extra_networks(page: Optional[str] = None, name: Optional[str] = None, filename: Optional[str] = None, title: Optional[str] = None, fullname: Optional[str] = None, hash: Optional[str] = None): # pylint: disable=redefined-builtin
res = []
for pg in shared.extra_networks:
@@ -126,6 +132,10 @@ def post_refresh_checkpoints():
def post_refresh_vae():
return shared.refresh_vaes()
+def post_refresh_loras():
+ from modules.lora import networks
+ return networks.list_available_networks()
+
def get_extensions_list():
from modules import extensions
extensions.list_extensions()
diff --git a/modules/extensions.py b/modules/extensions.py
index 5a8a53d29..ccd92dbf0 100644
--- a/modules/extensions.py
+++ b/modules/extensions.py
@@ -154,4 +154,4 @@ def list_extensions():
for dirname, path, is_builtin in extension_paths:
extension = Extension(name=dirname, path=path, enabled=dirname not in disabled_extensions, is_builtin=is_builtin)
extensions.append(extension)
- shared.log.info(f'Disabled extensions: {[e.name for e in extensions if not e.enabled]}')
+ shared.log.debug(f'Disabled extensions: {[e.name for e in extensions if not e.enabled]}')
diff --git a/modules/extra_networks.py b/modules/extra_networks.py
index b464bd349..010157af9 100644
--- a/modules/extra_networks.py
+++ b/modules/extra_networks.py
@@ -15,10 +15,13 @@ def register_extra_network(extra_network):
def register_default_extra_networks():
- from modules.ui_extra_networks_hypernet import ExtraNetworkHypernet
- register_extra_network(ExtraNetworkHypernet())
from modules.ui_extra_networks_styles import ExtraNetworkStyles
register_extra_network(ExtraNetworkStyles())
+ from modules.lora.extra_networks_lora import ExtraNetworkLora
+ register_extra_network(ExtraNetworkLora())
+ if shared.opts.hypernetwork_enabled:
+ from modules.ui_extra_networks_hypernet import ExtraNetworkHypernet
+ register_extra_network(ExtraNetworkHypernet())
class ExtraNetworkParams:
diff --git a/modules/hashes.py b/modules/hashes.py
index cf83794b0..a003f4840 100644
--- a/modules/hashes.py
+++ b/modules/hashes.py
@@ -9,6 +9,13 @@
cache_data = None
progress_ok = True
+
+def init_cache():
+ global cache_data # pylint: disable=global-statement
+ if cache_data is None:
+ cache_data = {} if not os.path.isfile(cache_filename) else shared.readfile(cache_filename, lock=True)
+
+
def dump_cache():
shared.writefile(cache_data, cache_filename)
diff --git a/modules/infotext.py b/modules/infotext.py
index 05e06e600..4b9dd15ff 100644
--- a/modules/infotext.py
+++ b/modules/infotext.py
@@ -10,6 +10,7 @@
debug = lambda *args, **kwargs: None # pylint: disable=unnecessary-lambda-assignment
re_size = re.compile(r"^(\d+)x(\d+)$") # int x int
re_param = re.compile(r'\s*([\w ]+):\s*("(?:\\"[^,]|\\"|\\|[^\"])+"|[^,]*)(?:,|$)') # multi-word: value
+re_lora = re.compile("")
- if added:
- params["Prompt"] += "\n" + "".join(added)
-
-
-list_available_networks()
diff --git a/modules/sd_checkpoint.py b/modules/sd_checkpoint.py
index e035fc3db..a95ade0b1 100644
--- a/modules/sd_checkpoint.py
+++ b/modules/sd_checkpoint.py
@@ -275,6 +275,12 @@ def select_checkpoint(op='model'):
return checkpoint_info
+def init_metadata():
+ global sd_metadata # pylint: disable=global-statement
+ if sd_metadata is None:
+ sd_metadata = shared.readfile(sd_metadata_file, lock=True) if os.path.isfile(sd_metadata_file) else {}
+
+
def read_metadata_from_safetensors(filename):
global sd_metadata # pylint: disable=global-statement
if sd_metadata is None:
diff --git a/modules/shared.py b/modules/shared.py
index 72af37500..e213997c7 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -132,7 +132,8 @@ def readfile(filename, silent=False, lock=False):
# data = json.loads(data)
t1 = time.time()
if not silent:
- log.debug(f'Read: file="{filename}" json={len(data)} bytes={os.path.getsize(filename)} time={t1-t0:.3f}')
+ fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
+ log.debug(f'Read: file="{filename}" json={len(data)} bytes={os.path.getsize(filename)} time={t1-t0:.3f} fn={fn}')
except FileNotFoundError as err:
log.debug(f'Reading failed: {filename} {err}')
except Exception as err:
@@ -363,7 +364,7 @@ def list_samplers():
def temp_disable_extensions():
disable_safe = ['sd-webui-controlnet', 'multidiffusion-upscaler-for-automatic1111', 'a1111-sd-webui-lycoris', 'sd-webui-agent-scheduler', 'clip-interrogator-ext', 'stable-diffusion-webui-rembg', 'sd-extension-chainner', 'stable-diffusion-webui-images-browser']
- disable_diffusers = ['sd-webui-controlnet', 'multidiffusion-upscaler-for-automatic1111', 'a1111-sd-webui-lycoris', 'sd-webui-animatediff']
+ disable_diffusers = ['sd-webui-controlnet', 'multidiffusion-upscaler-for-automatic1111', 'a1111-sd-webui-lycoris', 'sd-webui-animatediff', 'Lora']
disable_themes = ['sd-webui-lobe-theme', 'cozy-nest', 'sdnext-modernui']
disable_original = []
disabled = []
diff --git a/modules/ui_extra_networks.py b/modules/ui_extra_networks.py
index c326219df..898522366 100644
--- a/modules/ui_extra_networks.py
+++ b/modules/ui_extra_networks.py
@@ -460,17 +460,19 @@ def register_page(page: ExtraNetworksPage):
def register_pages():
- from modules.ui_extra_networks_textual_inversion import ExtraNetworksPageTextualInversion
from modules.ui_extra_networks_checkpoints import ExtraNetworksPageCheckpoints
- from modules.ui_extra_networks_styles import ExtraNetworksPageStyles
+ from modules.ui_extra_networks_lora import ExtraNetworksPageLora
from modules.ui_extra_networks_vae import ExtraNetworksPageVAEs
+ from modules.ui_extra_networks_styles import ExtraNetworksPageStyles
from modules.ui_extra_networks_history import ExtraNetworksPageHistory
+ from modules.ui_extra_networks_textual_inversion import ExtraNetworksPageTextualInversion
debug('EN register-pages')
register_page(ExtraNetworksPageCheckpoints())
- register_page(ExtraNetworksPageStyles())
- register_page(ExtraNetworksPageTextualInversion())
+ register_page(ExtraNetworksPageLora())
register_page(ExtraNetworksPageVAEs())
+ register_page(ExtraNetworksPageStyles())
register_page(ExtraNetworksPageHistory())
+ register_page(ExtraNetworksPageTextualInversion())
if shared.opts.hypernetwork_enabled:
from modules.ui_extra_networks_hypernets import ExtraNetworksPageHypernetworks
register_page(ExtraNetworksPageHypernetworks())
diff --git a/modules/lora/ui_extra_networks_lora.py b/modules/ui_extra_networks_lora.py
similarity index 100%
rename from modules/lora/ui_extra_networks_lora.py
rename to modules/ui_extra_networks_lora.py
diff --git a/modules/ui_models.py b/modules/ui_models.py
index 624c3849d..7ab8b0d07 100644
--- a/modules/ui_models.py
+++ b/modules/ui_models.py
@@ -8,7 +8,7 @@
from modules.ui_components import ToolButton
from modules.ui_common import create_refresh_button
from modules.call_queue import wrap_gradio_gpu_call
-from modules.shared import opts, log, req, readfile, max_workers
+from modules.shared import opts, log, req, readfile, max_workers, native
import modules.ui_symbols
import modules.errors
import modules.hashes
@@ -794,6 +794,10 @@ def civit_update_download():
civit_results4.select(fn=civit_update_select, inputs=[civit_results4], outputs=[models_outcome, civit_update_download_btn])
civit_update_download_btn.click(fn=civit_update_download, inputs=[], outputs=[models_outcome])
+ if native:
+ from modules.lora.lora_extract import create_ui as lora_extract_ui
+ lora_extract_ui()
+
for ui in extra_ui:
if callable(ui):
ui()
diff --git a/scripts/lora_script.py b/scripts/lora_script.py
deleted file mode 100644
index a153a2caa..000000000
--- a/scripts/lora_script.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import re
-import modules.lora.networks as networks
-from modules.lora.lora_extract import create_ui
-from modules.lora.network import NetworkOnDisk
-from modules.lora.ui_extra_networks_lora import ExtraNetworksPageLora
-from modules.lora.extra_networks_lora import ExtraNetworkLora
-from modules import script_callbacks, extra_networks, ui_extra_networks, ui_models, shared # pylint: disable=unused-import
-
-
-re_lora = re.compile("
Date: Fri, 29 Nov 2024 10:05:06 -0500
Subject: [PATCH 041/162] conditional imports and summary timer
Signed-off-by: Vladimir Mandic
---
modules/api/api.py | 7 +++++--
modules/infotext.py | 1 +
modules/lora/networks.py | 8 +++++++-
modules/processing_callbacks.py | 5 ++++-
modules/processing_diffusers.py | 11 ++++++-----
modules/ui_extra_networks.py | 7 ++++---
webui.py | 7 ++++---
7 files changed, 31 insertions(+), 15 deletions(-)
diff --git a/modules/api/api.py b/modules/api/api.py
index 7d2c2f279..b958085ea 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -79,7 +79,6 @@ def __init__(self, app: FastAPI, queue_lock: Lock):
self.add_api_route("/sdapi/v1/sd-vae", endpoints.get_sd_vaes, methods=["GET"], response_model=List[models.ItemVae])
self.add_api_route("/sdapi/v1/extensions", endpoints.get_extensions_list, methods=["GET"], response_model=List[models.ItemExtension])
self.add_api_route("/sdapi/v1/extra-networks", endpoints.get_extra_networks, methods=["GET"], response_model=List[models.ItemExtraNetwork])
- self.add_api_route("/sdapi/v1/loras", endpoints.get_loras, methods=["GET"], response_model=List[dict])
# functional api
self.add_api_route("/sdapi/v1/png-info", endpoints.post_pnginfo, methods=["POST"], response_model=models.ResImageInfo)
@@ -89,10 +88,14 @@ def __init__(self, app: FastAPI, queue_lock: Lock):
self.add_api_route("/sdapi/v1/unload-checkpoint", endpoints.post_unload_checkpoint, methods=["POST"])
self.add_api_route("/sdapi/v1/reload-checkpoint", endpoints.post_reload_checkpoint, methods=["POST"])
self.add_api_route("/sdapi/v1/refresh-vae", endpoints.post_refresh_vae, methods=["POST"])
- self.add_api_route("/sdapi/v1/refresh-loras", endpoints.post_refresh_loras, methods=["POST"])
self.add_api_route("/sdapi/v1/history", endpoints.get_history, methods=["GET"], response_model=List[str])
self.add_api_route("/sdapi/v1/history", endpoints.post_history, methods=["POST"], response_model=int)
+ # lora api
+ if shared.native:
+ self.add_api_route("/sdapi/v1/loras", endpoints.get_loras, methods=["GET"], response_model=List[dict])
+ self.add_api_route("/sdapi/v1/refresh-loras", endpoints.post_refresh_loras, methods=["POST"])
+
# gallery api
gallery.register_api(app)
diff --git a/modules/infotext.py b/modules/infotext.py
index 4b9dd15ff..baa995c88 100644
--- a/modules/infotext.py
+++ b/modules/infotext.py
@@ -28,6 +28,7 @@ def unquote(text):
return text
+# disabled by default can be enabled if needed
def check_lora(params):
try:
import modules.lora.networks as networks
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index dc6d86b2f..2db145a5a 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -17,7 +17,7 @@
import modules.lora.network_glora as network_glora
import modules.lora.network_overrides as network_overrides
import modules.lora.lora_convert as lora_convert
-from modules import shared, devices, sd_models, sd_models_compile, errors, scripts, files_cache, model_quant
+from modules import shared, devices, sd_models, sd_models_compile, errors, files_cache, model_quant
debug = os.environ.get('SD_LORA_DEBUG', None) is not None
@@ -44,6 +44,10 @@
]
+def total_time():
+ return sum(timer.values())
+
+
def assign_network_names_to_compvis_modules(sd_model):
if sd_model is None:
return
@@ -394,6 +398,8 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
def network_load():
+ for k in timer.keys():
+ timer[k] = 0
sd_model = getattr(shared.sd_model, "pipe", shared.sd_model) # wrapped model compatiblility
for component_name in ['text_encoder','text_encoder_2', 'unet', 'transformer']:
component = getattr(sd_model, component_name, None)
diff --git a/modules/processing_callbacks.py b/modules/processing_callbacks.py
index 0bb94abad..e1bf723cc 100644
--- a/modules/processing_callbacks.py
+++ b/modules/processing_callbacks.py
@@ -6,6 +6,7 @@
from modules import shared, processing_correction, extra_networks, timer, prompt_parser_diffusers
from modules.lora.networks import network_load
+
p = None
debug = os.environ.get('SD_CALLBACK_DEBUG', None) is not None
debug_callback = shared.log.trace if debug else lambda *args, **kwargs: None
@@ -15,6 +16,7 @@ def set_callbacks_p(processing):
global p # pylint: disable=global-statement
p = processing
+
def prompt_callback(step, kwargs):
if prompt_parser_diffusers.embedder is None or 'prompt_embeds' not in kwargs:
return kwargs
@@ -29,6 +31,7 @@ def prompt_callback(step, kwargs):
debug_callback(f"Callback: {e}")
return kwargs
+
def diffusers_callback_legacy(step: int, timestep: int, latents: typing.Union[torch.FloatTensor, np.ndarray]):
if p is None:
return
@@ -64,7 +67,7 @@ def diffusers_callback(pipe, step: int = 0, timestep: int = 0, kwargs: dict = {}
if shared.state.interrupted or shared.state.skipped:
raise AssertionError('Interrupted...')
time.sleep(0.1)
- if hasattr(p, "stepwise_lora"):
+ if hasattr(p, "stepwise_lora") and shared.native:
extra_networks.activate(p, p.extra_network_data, step=step)
network_load()
if latents is None:
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index 2e8fb357c..ae24f5f80 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -8,8 +8,7 @@
from modules.processing_helpers import resize_hires, calculate_base_steps, calculate_hires_steps, calculate_refiner_steps, save_intermediate, update_sampler, is_txt2img, is_refiner_enabled
from modules.processing_args import set_pipeline_args
from modules.onnx_impl import preprocess_pipeline as preprocess_onnx_pipeline, check_parameters_changed as olive_check_parameters_changed
-from modules.lora.networks import network_load
-from modules.lora.networks import timer as network_timer
+from modules.lora import networks
debug = shared.log.trace if os.environ.get('SD_DIFFUSERS_DEBUG', None) is not None else lambda *args, **kwargs: None
@@ -427,9 +426,9 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
p.prompts = p.all_prompts[p.iteration * p.batch_size:(p.iteration+1) * p.batch_size]
if p.negative_prompts is None or len(p.negative_prompts) == 0:
p.negative_prompts = p.all_negative_prompts[p.iteration * p.batch_size:(p.iteration+1) * p.batch_size]
- network_timer['apply'] = 0
- network_timer['restore'] = 0
- network_load()
+
+ # load loras
+ networks.network_load()
sd_models.move_model(shared.sd_model, devices.device)
sd_models_compile.openvino_recompile_model(p, hires=False, refiner=False) # recompile if a parameter changes
@@ -459,6 +458,8 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
results = process_decode(p, output)
timer.process.record('decode')
+ timer.process.add('lora', networks.total_time())
+
shared.sd_model = orig_pipeline
if p.state == '':
global last_p # pylint: disable=global-statement
diff --git a/modules/ui_extra_networks.py b/modules/ui_extra_networks.py
index 898522366..94664c5cb 100644
--- a/modules/ui_extra_networks.py
+++ b/modules/ui_extra_networks.py
@@ -460,19 +460,20 @@ def register_page(page: ExtraNetworksPage):
def register_pages():
+ debug('EN register-pages')
from modules.ui_extra_networks_checkpoints import ExtraNetworksPageCheckpoints
- from modules.ui_extra_networks_lora import ExtraNetworksPageLora
from modules.ui_extra_networks_vae import ExtraNetworksPageVAEs
from modules.ui_extra_networks_styles import ExtraNetworksPageStyles
from modules.ui_extra_networks_history import ExtraNetworksPageHistory
from modules.ui_extra_networks_textual_inversion import ExtraNetworksPageTextualInversion
- debug('EN register-pages')
register_page(ExtraNetworksPageCheckpoints())
- register_page(ExtraNetworksPageLora())
register_page(ExtraNetworksPageVAEs())
register_page(ExtraNetworksPageStyles())
register_page(ExtraNetworksPageHistory())
register_page(ExtraNetworksPageTextualInversion())
+ if shared.native:
+ from modules.ui_extra_networks_lora import ExtraNetworksPageLora
+ register_page(ExtraNetworksPageLora())
if shared.opts.hypernetwork_enabled:
from modules.ui_extra_networks_hypernets import ExtraNetworksPageHypernetworks
register_page(ExtraNetworksPageHypernetworks())
diff --git a/webui.py b/webui.py
index 3aae34447..2b8d7c56f 100644
--- a/webui.py
+++ b/webui.py
@@ -34,7 +34,6 @@
import modules.upscaler
import modules.textual_inversion.textual_inversion
import modules.hypernetworks.hypernetwork
-import modules.lora.networks
import modules.script_callbacks
from modules.api.middleware import setup_middleware
from modules.shared import cmd_opts, opts # pylint: disable=unused-import
@@ -104,8 +103,10 @@ def initialize():
modules.sd_models.setup_model()
timer.startup.record("models")
- modules.lora.networks.list_available_networks()
- timer.startup.record("lora")
+ if shared.native:
+ import modules.lora.networks as lora_networks
+ lora_networks.list_available_networks()
+ timer.startup.record("lora")
shared.prompt_styles.reload()
timer.startup.record("styles")
From 493c953d49788a6ad50363b766324fda5a943042 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Fri, 29 Nov 2024 10:22:47 -0500
Subject: [PATCH 042/162] cleanup
Signed-off-by: Vladimir Mandic
---
modules/modelloader.py | 4 ++--
modules/sd_checkpoint.py | 6 +++++-
modules/ui_extra_networks_lora.py | 2 +-
webui.py | 6 +++---
4 files changed, 11 insertions(+), 7 deletions(-)
diff --git a/modules/modelloader.py b/modules/modelloader.py
index b1b3930d6..b022b4fc6 100644
--- a/modules/modelloader.py
+++ b/modules/modelloader.py
@@ -267,7 +267,7 @@ def download_diffusers_model(hub_id: str, cache_dir: str = None, download_config
def load_diffusers_models(clear=True):
excluded_models = []
- t0 = time.time()
+ # t0 = time.time()
place = shared.opts.diffusers_dir
if place is None or len(place) == 0 or not os.path.isdir(place):
place = os.path.join(models_path, 'Diffusers')
@@ -316,7 +316,7 @@ def load_diffusers_models(clear=True):
debug(f'Error analyzing diffusers model: "{folder}" {e}')
except Exception as e:
shared.log.error(f"Error listing diffusers: {place} {e}")
- shared.log.debug(f'Scanning diffusers cache: folder="{place}" items={len(list(diffuser_repos))} time={time.time()-t0:.2f}')
+ # shared.log.debug(f'Scanning diffusers cache: folder="{place}" items={len(list(diffuser_repos))} time={time.time()-t0:.2f}')
return diffuser_repos
diff --git a/modules/sd_checkpoint.py b/modules/sd_checkpoint.py
index a95ade0b1..2f6533ef0 100644
--- a/modules/sd_checkpoint.py
+++ b/modules/sd_checkpoint.py
@@ -123,13 +123,17 @@ def list_models():
checkpoint_aliases.clear()
ext_filter = [".safetensors"] if shared.opts.sd_disable_ckpt or shared.native else [".ckpt", ".safetensors"]
model_list = list(modelloader.load_models(model_path=model_path, model_url=None, command_path=shared.opts.ckpt_dir, ext_filter=ext_filter, download_name=None, ext_blacklist=[".vae.ckpt", ".vae.safetensors"]))
+ safetensors_list = []
for filename in sorted(model_list, key=str.lower):
checkpoint_info = CheckpointInfo(filename)
+ safetensors_list.append(checkpoint_info)
if checkpoint_info.name is not None:
checkpoint_info.register()
+ diffusers_list = []
if shared.native:
for repo in modelloader.load_diffusers_models(clear=True):
checkpoint_info = CheckpointInfo(repo['name'], sha=repo['hash'])
+ diffusers_list.append(checkpoint_info)
if checkpoint_info.name is not None:
checkpoint_info.register()
if shared.cmd_opts.ckpt is not None:
@@ -143,7 +147,7 @@ def list_models():
shared.opts.data['sd_model_checkpoint'] = checkpoint_info.title
elif shared.cmd_opts.ckpt != shared.default_sd_model_file and shared.cmd_opts.ckpt is not None:
shared.log.warning(f'Load model: path="{shared.cmd_opts.ckpt}" not found')
- shared.log.info(f'Available Models: path="{shared.opts.ckpt_dir}" items={len(checkpoints_list)} time={time.time()-t0:.2f}')
+ shared.log.info(f'Available Models: items={len(checkpoints_list)} safetensors="{shared.opts.ckpt_dir}":{len(safetensors_list)} diffusers="{shared.opts.diffusers_dir}":{len(diffusers_list)} time={time.time()-t0:.2f}')
checkpoints_list = dict(sorted(checkpoints_list.items(), key=lambda cp: cp[1].filename))
def update_model_hashes():
diff --git a/modules/ui_extra_networks_lora.py b/modules/ui_extra_networks_lora.py
index 73cce47a3..9dd1b3573 100644
--- a/modules/ui_extra_networks_lora.py
+++ b/modules/ui_extra_networks_lora.py
@@ -120,4 +120,4 @@ def list_items(self):
return items
def allowed_directories_for_previews(self):
- return [shared.cmd_opts.lora_dir, shared.cmd_opts.lyco_dir]
+ return [shared.cmd_opts.lora_dir]
diff --git a/webui.py b/webui.py
index 2b8d7c56f..4eb6e89ce 100644
--- a/webui.py
+++ b/webui.py
@@ -96,9 +96,6 @@ def initialize():
modules.model_te.refresh_te_list()
timer.startup.record("te")
- extensions.list_extensions()
- timer.startup.record("extensions")
-
modelloader.cleanup_models()
modules.sd_models.setup_model()
timer.startup.record("models")
@@ -120,6 +117,9 @@ def initialize():
yolo.initialize()
timer.startup.record("detailer")
+ extensions.list_extensions()
+ timer.startup.record("extensions")
+
log.info('Load extensions')
t_timer, t_total = modules.scripts.load_scripts()
timer.startup.record("extensions")
From 39b14a202e93f621ac16216b2471f96e1d5b6d48 Mon Sep 17 00:00:00 2001
From: Disty0
Date: Fri, 29 Nov 2024 22:35:00 +0300
Subject: [PATCH 043/162] Fix sequential offload with lora
---
modules/lora/networks.py | 8 +++++---
modules/sd_models.py | 14 +++++++++-----
2 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 2db145a5a..8a23f7413 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -278,8 +278,6 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No
shared.sd_model = sd_models_compile.compile_diffusers(shared.sd_model)
shared.compiled_model_state.lora_model = backup_lora_model
- if shared.opts.diffusers_offload_mode == "balanced":
- sd_models.apply_balanced_offload(shared.sd_model)
t1 = time.time()
timer['load'] += t1 - t0
@@ -401,12 +399,16 @@ def network_load():
for k in timer.keys():
timer[k] = 0
sd_model = getattr(shared.sd_model, "pipe", shared.sd_model) # wrapped model compatiblility
+ if shared.opts.diffusers_offload_mode == "sequential":
+ sd_models.disable_offload(sd_model)
+ sd_models.move_model(sd_model, device=devices.cpu)
for component_name in ['text_encoder','text_encoder_2', 'unet', 'transformer']:
component = getattr(sd_model, component_name, None)
if component is not None:
for _, module in component.named_modules():
network_apply_weights(module)
-
+ if shared.opts.diffusers_offload_mode == "sequential":
+ sd_models.set_diffuser_offload(sd_model, op="model")
def list_available_networks():
t0 = time.time()
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 68446bdd3..361f6375b 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -405,7 +405,7 @@ def apply_balanced_offload_to_module(pipe):
if hasattr(pipe, "_internal_dict"):
keys = pipe._internal_dict.keys() # pylint: disable=protected-access
else:
- keys = get_signature(shared.sd_model).keys()
+ keys = get_signature(pipe).keys()
for module_name in keys: # pylint: disable=protected-access
module = getattr(pipe, module_name, None)
if isinstance(module, torch.nn.Module):
@@ -1448,10 +1448,14 @@ def disable_offload(sd_model):
from accelerate.hooks import remove_hook_from_module
if not getattr(sd_model, 'has_accelerate', False):
return
- if hasattr(sd_model, 'components'):
- for _name, model in sd_model.components.items():
- if isinstance(model, torch.nn.Module):
- remove_hook_from_module(model, recurse=True)
+ if hasattr(sd_model, "_internal_dict"):
+ keys = sd_model._internal_dict.keys() # pylint: disable=protected-access
+ else:
+ keys = get_signature(sd_model).keys()
+ for module_name in keys: # pylint: disable=protected-access
+ module = getattr(sd_model, module_name, None)
+ if isinstance(module, torch.nn.Module):
+ module = remove_hook_from_module(module, recurse=True)
sd_model.has_accelerate = False
From 9187418358d6991fc34ee9a10a9e53340eda1e1d Mon Sep 17 00:00:00 2001
From: Disty0
Date: Fri, 29 Nov 2024 22:53:17 +0300
Subject: [PATCH 044/162] revert networs.py
---
modules/lora/networks.py | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 8a23f7413..2db145a5a 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -278,6 +278,8 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No
shared.sd_model = sd_models_compile.compile_diffusers(shared.sd_model)
shared.compiled_model_state.lora_model = backup_lora_model
+ if shared.opts.diffusers_offload_mode == "balanced":
+ sd_models.apply_balanced_offload(shared.sd_model)
t1 = time.time()
timer['load'] += t1 - t0
@@ -399,16 +401,12 @@ def network_load():
for k in timer.keys():
timer[k] = 0
sd_model = getattr(shared.sd_model, "pipe", shared.sd_model) # wrapped model compatiblility
- if shared.opts.diffusers_offload_mode == "sequential":
- sd_models.disable_offload(sd_model)
- sd_models.move_model(sd_model, device=devices.cpu)
for component_name in ['text_encoder','text_encoder_2', 'unet', 'transformer']:
component = getattr(sd_model, component_name, None)
if component is not None:
for _, module in component.named_modules():
network_apply_weights(module)
- if shared.opts.diffusers_offload_mode == "sequential":
- sd_models.set_diffuser_offload(sd_model, op="model")
+
def list_available_networks():
t0 = time.time()
From e52019104d822c156a5b7dfb9c8a734bd897a4a3 Mon Sep 17 00:00:00 2001
From: Disty0
Date: Fri, 29 Nov 2024 22:55:15 +0300
Subject: [PATCH 045/162] revert sd_models.py
---
modules/sd_models.py | 14 +++++---------
1 file changed, 5 insertions(+), 9 deletions(-)
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 361f6375b..68446bdd3 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -405,7 +405,7 @@ def apply_balanced_offload_to_module(pipe):
if hasattr(pipe, "_internal_dict"):
keys = pipe._internal_dict.keys() # pylint: disable=protected-access
else:
- keys = get_signature(pipe).keys()
+ keys = get_signature(shared.sd_model).keys()
for module_name in keys: # pylint: disable=protected-access
module = getattr(pipe, module_name, None)
if isinstance(module, torch.nn.Module):
@@ -1448,14 +1448,10 @@ def disable_offload(sd_model):
from accelerate.hooks import remove_hook_from_module
if not getattr(sd_model, 'has_accelerate', False):
return
- if hasattr(sd_model, "_internal_dict"):
- keys = sd_model._internal_dict.keys() # pylint: disable=protected-access
- else:
- keys = get_signature(sd_model).keys()
- for module_name in keys: # pylint: disable=protected-access
- module = getattr(sd_model, module_name, None)
- if isinstance(module, torch.nn.Module):
- module = remove_hook_from_module(module, recurse=True)
+ if hasattr(sd_model, 'components'):
+ for _name, model in sd_model.components.items():
+ if isinstance(model, torch.nn.Module):
+ remove_hook_from_module(model, recurse=True)
sd_model.has_accelerate = False
From ea994a881e33f911c6556bdd38cb9cd2587e2e64 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Fri, 29 Nov 2024 15:40:09 -0500
Subject: [PATCH 046/162] lora stats
Signed-off-by: Vladimir Mandic
---
installer.py | 2 +-
scripts/flux_tools.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/installer.py b/installer.py
index 37202552d..8fb6d9683 100644
--- a/installer.py
+++ b/installer.py
@@ -459,7 +459,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
def check_diffusers():
if args.skip_all or args.skip_requirements:
return
- sha = '069186fac510d6f6f88a5e435523b235c823a8a0'
+ sha = 'c96bfa5c80eca798d555a79a491043c311d0f608'
pkg = pkg_resources.working_set.by_key.get('diffusers', None)
minor = int(pkg.version.split('.')[1] if pkg is not None else 0)
cur = opts.get('diffusers_version', '') if minor > 0 else ''
diff --git a/scripts/flux_tools.py b/scripts/flux_tools.py
index e5fe443b7..3fbab6c6f 100644
--- a/scripts/flux_tools.py
+++ b/scripts/flux_tools.py
@@ -100,7 +100,7 @@ def run(self, p: processing.StableDiffusionProcessing, tool: str = 'None', stren
if tool == 'Depth':
# pipe = FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-Depth-dev", torch_dtype=torch.bfloat16, revision="refs/pr/1").to("cuda")
- install('git+https://github.com/asomoza/image_gen_aux.git', 'image_gen_aux')
+ install('git+https://github.com/huggingface/image_gen_aux.git', 'image_gen_aux')
if shared.sd_model.__class__.__name__ != 'FluxControlPipeline' or 'Depth' not in shared.opts.sd_model_checkpoint:
shared.opts.data["sd_model_checkpoint"] = "black-forest-labs/FLUX.1-Depth-dev"
sd_models.reload_model_weights(op='model', revision="refs/pr/1")
From 797ad1f20f33b90c06380bedbcf7da1474cc90a7 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Fri, 29 Nov 2024 15:40:20 -0500
Subject: [PATCH 047/162] lora stats
Signed-off-by: Vladimir Mandic
---
modules/lora/networks.py | 69 +++++++++++++++++++++++++++-------------
modules/model_flux.py | 8 ++---
2 files changed, 51 insertions(+), 26 deletions(-)
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 2db145a5a..beb4634c2 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -5,6 +5,7 @@
import concurrent
import torch
import diffusers.models.lora
+import rich.progress as p
import modules.lora.network as network
import modules.lora.network_lora as network_lora
@@ -21,11 +22,12 @@
debug = os.environ.get('SD_LORA_DEBUG', None) is not None
+pbar = p.Progress(p.TextColumn('[cyan]LoRA apply'), p.BarColumn(), p.TaskProgressColumn(), p.TimeRemainingColumn(), p.TimeElapsedColumn(), p.TextColumn('[cyan]{task.description}'), console=shared.console)
extra_network_lora = None
available_networks = {}
available_network_aliases = {}
loaded_networks: List[network.Network] = []
-timer = { 'load': 0, 'apply': 0, 'restore': 0, 'deactivate': 0 }
+timer = { 'list': 0, 'load': 0, 'backup': 0, 'calc': 0, 'apply': 0, 'restore': 0, 'deactivate': 0 }
lora_cache = {}
diffuser_loaded = []
diffuser_scales = []
@@ -216,7 +218,6 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No
loaded_networks.clear()
diffuser_loaded.clear()
diffuser_scales.clear()
- timer['load'] = 0
t0 = time.time()
for i, (network_on_disk, name) in enumerate(zip(networks_on_disk, names)):
@@ -269,8 +270,6 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No
if len(loaded_networks) > 0 and debug:
shared.log.debug(f'Load network: type=LoRA loaded={len(loaded_networks)} cache={list(lora_cache)}')
- devices.torch_gc()
-
if recompile_model:
shared.log.info("Load network: type=LoRA recompiling model")
backup_lora_model = shared.compiled_model_state.lora_model
@@ -278,13 +277,18 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No
shared.sd_model = sd_models_compile.compile_diffusers(shared.sd_model)
shared.compiled_model_state.lora_model = backup_lora_model
- if shared.opts.diffusers_offload_mode == "balanced":
- sd_models.apply_balanced_offload(shared.sd_model)
+
+ if len(loaded_networks) > 0:
+ devices.torch_gc()
+ if shared.opts.diffusers_offload_mode == "balanced":
+ sd_models.apply_balanced_offload(shared.sd_model)
+
t1 = time.time()
- timer['load'] += t1 - t0
+ timer['load'] = t1 - t0
def set_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown, ex_bias):
+ t0 = time.time()
weights_backup = getattr(self, "network_weights_backup", None)
bias_backup = getattr(self, "network_bias_backup", None)
if weights_backup is None and bias_backup is None:
@@ -315,9 +319,12 @@ def set_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm
else:
self.bias = None
self.to(device)
+ t1 = time.time()
+ timer['apply'] += t1 - t0
def maybe_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], wanted_names): # pylint: disable=W0613
+ t0 = time.time()
weights_backup = getattr(self, "network_weights_backup", None)
if weights_backup is None and wanted_names != (): # pylint: disable=C1803
if getattr(self.weight, "quant_type", None) in ['nf4', 'fp4']:
@@ -344,6 +351,8 @@ def maybe_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
if shared.opts.lora_offload_backup and bias_backup is not None:
bias_backup = bias_backup.to(devices.cpu)
self.network_bias_backup = bias_backup
+ t1 = time.time()
+ timer['backup'] += t1 - t0
def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv]):
@@ -353,16 +362,13 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
If not, restores orginal weights from backup and alters weights according to networks.
"""
network_layer_name = getattr(self, 'network_layer_name', None)
- if network_layer_name is None:
- return
- t0 = time.time()
current_names = getattr(self, "network_current_names", ())
wanted_names = tuple((x.name, x.te_multiplier, x.unet_multiplier, x.dyn_dim) for x in loaded_networks)
- if any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419 # pylint: disable=R1729
- maybe_backup_weights(self, wanted_names)
+ maybe_backup_weights(self, wanted_names)
if current_names != wanted_names:
batch_updown = None
batch_ex_bias = None
+ t0 = time.time()
for net in loaded_networks:
# default workflow where module is known and has weights
module = net.modules.get(network_layer_name, None)
@@ -391,21 +397,39 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
continue
shared.log.warning(f'LoRA network="{net.name}" layer="{network_layer_name}" unsupported operation')
extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
+ t1 = time.time()
+ timer['calc'] += t1 - t0
set_weights(self, batch_updown, batch_ex_bias) # Set or restore weights from backup
self.network_current_names = wanted_names
- t1 = time.time()
- timer['apply'] += t1 - t0
-def network_load():
- for k in timer.keys():
- timer[k] = 0
+def network_load(): # called from processing
+ timer['backup'] = 0
+ timer['calc'] = 0
+ timer['apply'] = 0
sd_model = getattr(shared.sd_model, "pipe", shared.sd_model) # wrapped model compatiblility
- for component_name in ['text_encoder','text_encoder_2', 'unet', 'transformer']:
- component = getattr(sd_model, component_name, None)
- if component is not None:
- for _, module in component.named_modules():
- network_apply_weights(module)
+ with pbar:
+ for component_name in ['text_encoder','text_encoder_2', 'unet', 'transformer']:
+ component = getattr(sd_model, component_name, None)
+ if component is not None:
+ applied = 0
+ modules = list(component.named_modules())
+ task_start = time.time()
+ task = pbar.add_task(description=component_name , total=len(modules), visible=False)
+ for _, module in modules:
+ layer_name = getattr(module, 'network_layer_name', None)
+ if layer_name is None:
+ continue
+ present = any([net.modules.get(layer_name, None) for net in loaded_networks]) # noqa: C419
+ if present:
+ network_apply_weights(module)
+ applied += 1
+ pbar.update(task, advance=1, visible=(time.time() - task_start) > 1) # progress bar becomes visible if operation takes more than 1sec
+ pbar.remove_task(task)
+ if debug:
+ shared.log.debug(f'Load network: type=LoRA component={component_name} modules={len(modules)} applied={applied}')
+ if debug:
+ shared.log.debug(f'Load network: type=LoRA total={total_time():.2f} timers={timer}')
def list_available_networks():
@@ -442,4 +466,5 @@ def add_network(filename):
for fn in candidates:
executor.submit(add_network, fn)
t1 = time.time()
+ timer['list'] = t1 - t0
shared.log.info(f'Available LoRAs: path="{shared.cmd_opts.lora_dir}" items={len(available_networks)} folders={len(forbidden_network_aliases)} time={t1 - t0:.2f}')
diff --git a/modules/model_flux.py b/modules/model_flux.py
index 324e50b36..ce2c55f70 100644
--- a/modules/model_flux.py
+++ b/modules/model_flux.py
@@ -223,10 +223,8 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
if shared.opts.sd_unet != 'None':
try:
debug(f'Load model: type=FLUX unet="{shared.opts.sd_unet}"')
- _transformer = load_transformer(sd_unet.unet_dict[shared.opts.sd_unet])
- if _transformer is not None:
- transformer = _transformer
- else:
+ transformer = load_transformer(sd_unet.unet_dict[shared.opts.sd_unet])
+ if transformer is None:
shared.opts.sd_unet = 'None'
sd_unet.failed_unet.append(shared.opts.sd_unet)
except Exception as e:
@@ -334,6 +332,8 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
text_encoder_1 = None
text_encoder_2 = None
vae = None
+ for k in kwargs.keys():
+ kwargs[k] = None
devices.torch_gc()
return pipe
From 881fa1183ca3ca1bda1102026155be2b850a9782 Mon Sep 17 00:00:00 2001
From: Disty0
Date: Sat, 30 Nov 2024 00:16:38 +0300
Subject: [PATCH 048/162] Fix offload issues with lora
---
modules/lora/networks.py | 14 +++++++++++---
modules/sd_models.py | 14 +++++++++-----
2 files changed, 20 insertions(+), 8 deletions(-)
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index beb4634c2..f211149bd 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -280,8 +280,6 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No
if len(loaded_networks) > 0:
devices.torch_gc()
- if shared.opts.diffusers_offload_mode == "balanced":
- sd_models.apply_balanced_offload(shared.sd_model)
t1 = time.time()
timer['load'] = t1 - t0
@@ -375,7 +373,7 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
if module is not None and hasattr(self, 'weight'):
try:
with devices.inference_context():
- weight = self.weight # calculate quant weights once
+ weight = self.weight.to(devices.device) # calculate quant weights once
updown, ex_bias = module.calc_updown(weight)
if batch_updown is not None and updown is not None:
batch_updown += updown
@@ -385,6 +383,11 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
batch_ex_bias += ex_bias
else:
batch_ex_bias = ex_bias
+ if shared.opts.diffusers_offload_mode != "none":
+ if batch_updown is not None:
+ batch_updown = batch_updown.to(devices.cpu)
+ if batch_ex_bias is not None:
+ batch_ex_bias = batch_ex_bias.to(devices.cpu)
except RuntimeError as e:
extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
if debug:
@@ -408,6 +411,9 @@ def network_load(): # called from processing
timer['calc'] = 0
timer['apply'] = 0
sd_model = getattr(shared.sd_model, "pipe", shared.sd_model) # wrapped model compatiblility
+ if shared.opts.diffusers_offload_mode != "none":
+ sd_models.disable_offload(sd_model)
+ sd_models.move_model(sd_model, device=devices.cpu)
with pbar:
for component_name in ['text_encoder','text_encoder_2', 'unet', 'transformer']:
component = getattr(sd_model, component_name, None)
@@ -428,6 +434,8 @@ def network_load(): # called from processing
pbar.remove_task(task)
if debug:
shared.log.debug(f'Load network: type=LoRA component={component_name} modules={len(modules)} applied={applied}')
+ if shared.opts.diffusers_offload_mode != "none":
+ sd_models.set_diffuser_offload(sd_model, op="model")
if debug:
shared.log.debug(f'Load network: type=LoRA total={total_time():.2f} timers={timer}')
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 68446bdd3..361f6375b 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -405,7 +405,7 @@ def apply_balanced_offload_to_module(pipe):
if hasattr(pipe, "_internal_dict"):
keys = pipe._internal_dict.keys() # pylint: disable=protected-access
else:
- keys = get_signature(shared.sd_model).keys()
+ keys = get_signature(pipe).keys()
for module_name in keys: # pylint: disable=protected-access
module = getattr(pipe, module_name, None)
if isinstance(module, torch.nn.Module):
@@ -1448,10 +1448,14 @@ def disable_offload(sd_model):
from accelerate.hooks import remove_hook_from_module
if not getattr(sd_model, 'has_accelerate', False):
return
- if hasattr(sd_model, 'components'):
- for _name, model in sd_model.components.items():
- if isinstance(model, torch.nn.Module):
- remove_hook_from_module(model, recurse=True)
+ if hasattr(sd_model, "_internal_dict"):
+ keys = sd_model._internal_dict.keys() # pylint: disable=protected-access
+ else:
+ keys = get_signature(sd_model).keys()
+ for module_name in keys: # pylint: disable=protected-access
+ module = getattr(sd_model, module_name, None)
+ if isinstance(module, torch.nn.Module):
+ module = remove_hook_from_module(module, recurse=True)
sd_model.has_accelerate = False
From 369ae52401d1e3d3533862ea90e1c5847b14ad34 Mon Sep 17 00:00:00 2001
From: Disty0
Date: Sat, 30 Nov 2024 00:55:22 +0300
Subject: [PATCH 049/162] Update OpenVINO to 2024.5.0
---
CHANGELOG.md | 1 +
installer.py | 4 ++--
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 919041bde..08ae05471 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@
style-aligned applies selected attention layers uniformly to all images to achive consistency
can be used with or without input image in which case first prompt is used to establish baseline
*note:* all prompts are processes as a single batch, so vram is limiting factor
+- **OpenVINO**: update to 2024.5.0
### UI and workflow improvements
diff --git a/installer.py b/installer.py
index 37202552d..ec83cc1c3 100644
--- a/installer.py
+++ b/installer.py
@@ -640,7 +640,7 @@ def install_ipex(torch_command):
# os.environ.setdefault('TENSORFLOW_PACKAGE', 'tensorflow==2.15.1 intel-extension-for-tensorflow[xpu]==2.15.0.1')
else:
torch_command = os.environ.get('TORCH_COMMAND', '--pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/xpu') # torchvision doesn't exist on test/stable branch for windows
- install(os.environ.get('OPENVINO_PACKAGE', 'openvino==2024.3.0'), 'openvino', ignore=True)
+ install(os.environ.get('OPENVINO_PACKAGE', 'openvino==2024.5.0'), 'openvino', ignore=True)
install('nncf==2.7.0', 'nncf', ignore=True)
install(os.environ.get('ONNXRUNTIME_PACKAGE', 'onnxruntime-openvino'), 'onnxruntime-openvino', ignore=True)
return torch_command
@@ -650,7 +650,7 @@ def install_openvino(torch_command):
check_python(supported_minors=[8, 9, 10, 11, 12], reason='OpenVINO backend requires Python 3.9, 3.10 or 3.11')
log.info('OpenVINO: selected')
torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.3.1+cpu torchvision==0.18.1+cpu --index-url https://download.pytorch.org/whl/cpu')
- install(os.environ.get('OPENVINO_PACKAGE', 'openvino==2024.3.0'), 'openvino')
+ install(os.environ.get('OPENVINO_PACKAGE', 'openvino==2024.5.0'), 'openvino')
install(os.environ.get('ONNXRUNTIME_PACKAGE', 'onnxruntime-openvino'), 'onnxruntime-openvino', ignore=True)
install('nncf==2.12.0', 'nncf')
os.environ.setdefault('PYTORCH_TRACING_MODE', 'TORCHFX')
From 63ba83d361e37494d8a811ae1c9c77fae3cdc41b Mon Sep 17 00:00:00 2001
From: Disty0
Date: Sat, 30 Nov 2024 01:15:49 +0300
Subject: [PATCH 050/162] ZLUDA enable Dynamic attention by default
---
modules/shared.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/modules/shared.py b/modules/shared.py
index 720819135..11452d7ab 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -449,16 +449,16 @@ def get_default_modes():
default_cross_attention = "Scaled-Dot-Product" if native else "Doggettx's"
elif devices.backend == "mps":
default_cross_attention = "Scaled-Dot-Product" if native else "Doggettx's"
- else: # cuda, rocm, ipex, openvino
- default_cross_attention ="Scaled-Dot-Product"
+ else: # cuda, rocm, zluda, ipex, openvino
+ default_cross_attention = "Scaled-Dot-Product"
if devices.backend == "rocm":
default_sdp_options = ['Memory attention', 'Math attention']
elif devices.backend == "zluda":
- default_sdp_options = ['Math attention']
+ default_sdp_options = ['Math attention', 'Dynamic attention']
else:
default_sdp_options = ['Flash attention', 'Memory attention', 'Math attention']
- if (cmd_opts.lowvram or cmd_opts.medvram) and ('Flash attention' not in default_sdp_options):
+ if (cmd_opts.lowvram or cmd_opts.medvram) and ('Flash attention' not in default_sdp_options and 'Dynamic attention' not in default_sdp_options):
default_sdp_options.append('Dynamic attention')
return default_offload_mode, default_cross_attention, default_sdp_options
From 1e903129824ace3a33c7c04fd059f44ec18c52e8 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Sat, 30 Nov 2024 08:50:25 -0500
Subject: [PATCH 051/162] update lora
Signed-off-by: Vladimir Mandic
---
launch.py | 3 +++
modules/cmd_args.py | 1 +
modules/lora/networks.py | 45 ++++++++++++++++++-----------------
modules/processing.py | 9 +++++++
modules/processing_helpers.py | 6 ++++-
5 files changed, 41 insertions(+), 23 deletions(-)
diff --git a/launch.py b/launch.py
index e00da58c7..5c8a6051a 100755
--- a/launch.py
+++ b/launch.py
@@ -192,6 +192,9 @@ def main():
global args # pylint: disable=global-statement
installer.ensure_base_requirements()
init_args() # setup argparser and default folders
+ if args.malloc:
+ import tracemalloc
+ tracemalloc.start()
installer.args = args
installer.setup_logging()
installer.log.info('Starting SD.Next')
diff --git a/modules/cmd_args.py b/modules/cmd_args.py
index 752ad02c0..cb4e5fc16 100644
--- a/modules/cmd_args.py
+++ b/modules/cmd_args.py
@@ -26,6 +26,7 @@ def main_args():
group_diag.add_argument("--no-hashing", default=os.environ.get("SD_NOHASHING", False), action='store_true', help="Disable hashing of checkpoints, default: %(default)s")
group_diag.add_argument("--no-metadata", default=os.environ.get("SD_NOMETADATA", False), action='store_true', help="Disable reading of metadata from models, default: %(default)s")
group_diag.add_argument("--profile", default=os.environ.get("SD_PROFILE", False), action='store_true', help="Run profiler, default: %(default)s")
+ group_diag.add_argument("--malloc", default=os.environ.get("SD_PROFILE", False), action='store_true', help="Trace memory ops, default: %(default)s")
group_diag.add_argument("--disable-queue", default=os.environ.get("SD_DISABLEQUEUE", False), action='store_true', help="Disable queues, default: %(default)s")
group_diag.add_argument('--debug', default=os.environ.get("SD_DEBUG", False), action='store_true', help = "Run installer with debug logging, default: %(default)s")
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index f211149bd..23c45ff2a 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -22,7 +22,7 @@
debug = os.environ.get('SD_LORA_DEBUG', None) is not None
-pbar = p.Progress(p.TextColumn('[cyan]LoRA apply'), p.BarColumn(), p.TaskProgressColumn(), p.TimeRemainingColumn(), p.TimeElapsedColumn(), p.TextColumn('[cyan]{task.description}'), console=shared.console)
+pbar = p.Progress(p.TextColumn('[cyan]{task.description}'), p.BarColumn(), p.TaskProgressColumn(), p.TimeRemainingColumn(), p.TimeElapsedColumn(), console=shared.console)
extra_network_lora = None
available_networks = {}
available_network_aliases = {}
@@ -50,6 +50,13 @@ def total_time():
return sum(timer.values())
+def get_timers():
+ t = { 'total': round(sum(timer.values()), 2) }
+ for k, v in timer.items():
+ t[k] = round(v, 2)
+ return t
+
+
def assign_network_names_to_compvis_modules(sd_model):
if sd_model is None:
return
@@ -362,7 +369,8 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
network_layer_name = getattr(self, 'network_layer_name', None)
current_names = getattr(self, "network_current_names", ())
wanted_names = tuple((x.name, x.te_multiplier, x.unet_multiplier, x.dyn_dim) for x in loaded_networks)
- maybe_backup_weights(self, wanted_names)
+ if network_layer_name is not None and any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419
+ maybe_backup_weights(self, wanted_names)
if current_names != wanted_names:
batch_updown = None
batch_ex_bias = None
@@ -414,30 +422,23 @@ def network_load(): # called from processing
if shared.opts.diffusers_offload_mode != "none":
sd_models.disable_offload(sd_model)
sd_models.move_model(sd_model, device=devices.cpu)
+ modules = []
+ for component_name in ['text_encoder','text_encoder_2', 'unet', 'transformer']:
+ component = getattr(sd_model, component_name, None)
+ if component is not None and hasattr(component, 'named_modules'):
+ modules += list(component.named_modules())
with pbar:
- for component_name in ['text_encoder','text_encoder_2', 'unet', 'transformer']:
- component = getattr(sd_model, component_name, None)
- if component is not None:
- applied = 0
- modules = list(component.named_modules())
- task_start = time.time()
- task = pbar.add_task(description=component_name , total=len(modules), visible=False)
- for _, module in modules:
- layer_name = getattr(module, 'network_layer_name', None)
- if layer_name is None:
- continue
- present = any([net.modules.get(layer_name, None) for net in loaded_networks]) # noqa: C419
- if present:
- network_apply_weights(module)
- applied += 1
- pbar.update(task, advance=1, visible=(time.time() - task_start) > 1) # progress bar becomes visible if operation takes more than 1sec
- pbar.remove_task(task)
- if debug:
- shared.log.debug(f'Load network: type=LoRA component={component_name} modules={len(modules)} applied={applied}')
+ task = pbar.add_task(description='Apply network: type=LoRA' , total=len(modules), visible=len(loaded_networks) > 0)
+ for _, module in modules:
+ network_apply_weights(module)
+ # pbar.update(task, advance=1) # progress bar becomes visible if operation takes more than 1sec
+ pbar.remove_task(task)
+ if debug:
+ shared.log.debug(f'Load network: type=LoRA modules={len(modules)}')
if shared.opts.diffusers_offload_mode != "none":
sd_models.set_diffuser_offload(sd_model, op="model")
if debug:
- shared.log.debug(f'Load network: type=LoRA total={total_time():.2f} timers={timer}')
+ shared.log.debug(f'Load network: type=LoRA timers{get_timers()}')
def list_available_networks():
diff --git a/modules/processing.py b/modules/processing.py
index 16e7a9213..92faaee8d 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -473,4 +473,13 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
p.scripts.postprocess(p, processed)
timer.process.record('post')
shared.log.info(f'Processed: images={len(output_images)} its={(p.steps * len(output_images)) / (t1 - t0):.2f} time={t1-t0:.2f} timers={timer.process.dct(min_time=0.02)} memory={memstats.memory_stats()}')
+
+ if shared.cmd_opts.malloc:
+ import tracemalloc
+ snapshot = tracemalloc.take_snapshot()
+ stats = snapshot.statistics('lineno')
+ shared.log.debug('Profile malloc:')
+ for stat in stats[:20]:
+ frame = stat.traceback[0]
+ shared.log.debug(f' file="{frame.filename}":{frame.lineno} size={stat.size}')
return processed
diff --git a/modules/processing_helpers.py b/modules/processing_helpers.py
index 22acf296c..ab08d4cc8 100644
--- a/modules/processing_helpers.py
+++ b/modules/processing_helpers.py
@@ -1,4 +1,5 @@
import os
+import time
import math
import random
import warnings
@@ -9,7 +10,7 @@
from PIL import Image
from skimage import exposure
from blendmodes.blend import blendLayers, BlendType
-from modules import shared, devices, images, sd_models, sd_samplers, sd_hijack_hypertile, processing_vae
+from modules import shared, devices, images, sd_models, sd_samplers, sd_hijack_hypertile, processing_vae, timer
debug = shared.log.trace if os.environ.get('SD_PROCESS_DEBUG', None) is not None else lambda *args, **kwargs: None
@@ -352,6 +353,7 @@ def diffusers_image_conditioning(_source_image, latent_image, _image_mask=None):
def validate_sample(tensor):
+ t0 = time.time()
if not isinstance(tensor, np.ndarray) and not isinstance(tensor, torch.Tensor):
return tensor
dtype = tensor.dtype
@@ -377,6 +379,8 @@ def validate_sample(tensor):
if upcast is not None and not upcast:
setattr(shared.sd_model.vae.config, 'force_upcast', True) # noqa: B010
shared.log.warning('Decode: upcast=True set, retry operation')
+ t1 = time.time()
+ timer.process.add('validate', t1 - t0)
return cast
From eacd4e9357cd44ef1e79640846822ad6d593d48d Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Sat, 30 Nov 2024 09:02:47 -0500
Subject: [PATCH 052/162] add stats
Signed-off-by: Vladimir Mandic
---
modules/lora/networks.py | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 23c45ff2a..51ef27a8a 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -28,6 +28,7 @@
available_network_aliases = {}
loaded_networks: List[network.Network] = []
timer = { 'list': 0, 'load': 0, 'backup': 0, 'calc': 0, 'apply': 0, 'restore': 0, 'deactivate': 0 }
+backup_size = 0
lora_cache = {}
diffuser_loaded = []
diffuser_scales = []
@@ -289,6 +290,7 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No
devices.torch_gc()
t1 = time.time()
+ backup_size = 0
timer['load'] = t1 - t0
@@ -329,6 +331,7 @@ def set_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm
def maybe_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], wanted_names): # pylint: disable=W0613
+ global backup_size # pylint: disable=W0603
t0 = time.time()
weights_backup = getattr(self, "network_weights_backup", None)
if weights_backup is None and wanted_names != (): # pylint: disable=C1803
@@ -347,6 +350,7 @@ def maybe_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
if shared.opts.lora_offload_backup and weights_backup is not None:
weights_backup = weights_backup.to(devices.cpu)
self.network_weights_backup = weights_backup
+ backup_size += weights_backup.numel() * weights_backup.element_size()
bias_backup = getattr(self, "network_bias_backup", None)
if bias_backup is None:
if getattr(self, 'bias', None) is not None:
@@ -356,6 +360,8 @@ def maybe_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
if shared.opts.lora_offload_backup and bias_backup is not None:
bias_backup = bias_backup.to(devices.cpu)
self.network_bias_backup = bias_backup
+ if bias_backup is not None:
+ backup_size += bias_backup.numel() * bias_backup.element_size()
t1 = time.time()
timer['backup'] += t1 - t0
@@ -431,14 +437,15 @@ def network_load(): # called from processing
task = pbar.add_task(description='Apply network: type=LoRA' , total=len(modules), visible=len(loaded_networks) > 0)
for _, module in modules:
network_apply_weights(module)
- # pbar.update(task, advance=1) # progress bar becomes visible if operation takes more than 1sec
+ pbar.update(task, advance=1) # progress bar becomes visible if operation takes more than 1sec
pbar.remove_task(task)
+ modules.clear()
if debug:
shared.log.debug(f'Load network: type=LoRA modules={len(modules)}')
if shared.opts.diffusers_offload_mode != "none":
sd_models.set_diffuser_offload(sd_model, op="model")
if debug:
- shared.log.debug(f'Load network: type=LoRA timers{get_timers()}')
+ shared.log.debug(f'Load network: type=LoRA time={get_timers()} backup={backup_size}')
def list_available_networks():
From 6ec93f2d4609f36d3f66a8f4ac4b03bca32e5d12 Mon Sep 17 00:00:00 2001
From: Disty0
Date: Sat, 30 Nov 2024 17:04:35 +0300
Subject: [PATCH 053/162] Disable load lora gpu with medvram too
---
modules/sd_models.py | 5 ++++-
modules/shared.py | 2 +-
2 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 361f6375b..ccba0bfb5 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -413,11 +413,11 @@ def apply_balanced_offload_to_module(pipe):
if checkpoint_name is None:
checkpoint_name = pipe.__class__.__name__
offload_dir = os.path.join(shared.opts.accelerate_offload_path, checkpoint_name, module_name)
+ network_layer_name = getattr(module, "network_layer_name", None)
module = remove_hook_from_module(module, recurse=True)
try:
module = module.to("cpu")
module.offload_dir = offload_dir
- network_layer_name = getattr(module, "network_layer_name", None)
module = add_hook_to_module(module, dispatch_from_cpu_hook(), append=True)
module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
if network_layer_name:
@@ -1455,7 +1455,10 @@ def disable_offload(sd_model):
for module_name in keys: # pylint: disable=protected-access
module = getattr(sd_model, module_name, None)
if isinstance(module, torch.nn.Module):
+ network_layer_name = getattr(module, "network_layer_name", None)
module = remove_hook_from_module(module, recurse=True)
+ if network_layer_name:
+ module.network_layer_name = network_layer_name
sd_model.has_accelerate = False
diff --git a/modules/shared.py b/modules/shared.py
index e213997c7..bcb506cee 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -910,7 +910,7 @@ def get_default_modes():
"lora_apply_tags": OptionInfo(0, "LoRA auto-apply tags", gr.Slider, {"minimum": -1, "maximum": 32, "step": 1}),
"lora_in_memory_limit": OptionInfo(0, "LoRA memory cache", gr.Slider, {"minimum": 0, "maximum": 24, "step": 1}),
"lora_quant": OptionInfo("NF4","LoRA precision in quantized models", gr.Radio, {"choices": ["NF4", "FP4"]}),
- "lora_load_gpu": OptionInfo(True if not cmd_opts.lowvram else False, "Load LoRA directly to GPU"),
+ "lora_load_gpu": OptionInfo(True if not (cmd_opts.lowvram or cmd_opts.medvram) else False, "Load LoRA directly to GPU"),
"lora_offload_backup": OptionInfo(True, "Offload LoRA Backup Weights"),
}))
From eee85e5a4ed24af589bc9ee488cc4c496d747417 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Sat, 30 Nov 2024 12:29:58 -0500
Subject: [PATCH 054/162] lora refactor in progress
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 5 +-
modules/lora/extra_networks_lora.py | 9 ++-
modules/lora/networks.py | 73 ++++++++++++-------------
modules/processing_callbacks.py | 2 -
modules/processing_diffusers.py | 21 ++++---
modules/prompt_parser_diffusers.py | 9 ++-
modules/sd_models.py | 85 ++++++++++++++++-------------
7 files changed, 109 insertions(+), 95 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 919041bde..dcb88bcf3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
# Change Log for SD.Next
-## Update for 2024-11-28
+## Update for 2024-11-30
### New models and integrations
@@ -67,7 +67,8 @@
- fix xyz-grid with lora
- fix api script callbacks
- fix gpu memory monitoring
-- simplify img2img/inpaint/sketch canvas handling
+- simplify img2img/inpaint/sketch canvas handling
+- fix prompt caching
## Update for 2024-11-21
diff --git a/modules/lora/extra_networks_lora.py b/modules/lora/extra_networks_lora.py
index 3aea659d9..c875ba0d5 100644
--- a/modules/lora/extra_networks_lora.py
+++ b/modules/lora/extra_networks_lora.py
@@ -113,22 +113,21 @@ def __init__(self):
self.errors = {}
def activate(self, p, params_list, step=0):
- t0 = time.time()
self.errors.clear()
if self.active:
if self.model != shared.opts.sd_model_checkpoint: # reset if model changed
self.active = False
if len(params_list) > 0 and not self.active: # activate patches once
- shared.log.debug(f'Activate network: type=LoRA model="{shared.opts.sd_model_checkpoint}"')
+ # shared.log.debug(f'Activate network: type=LoRA model="{shared.opts.sd_model_checkpoint}"')
self.active = True
self.model = shared.opts.sd_model_checkpoint
names, te_multipliers, unet_multipliers, dyn_dims = parse(p, params_list, step)
- networks.load_networks(names, te_multipliers, unet_multipliers, dyn_dims)
- t1 = time.time()
+ networks.load_networks(names, te_multipliers, unet_multipliers, dyn_dims) # load
+ networks.network_load() # backup/apply
if len(networks.loaded_networks) > 0 and step == 0:
infotext(p)
prompt(p)
- shared.log.info(f'Load network: type=LoRA apply={[n.name for n in networks.loaded_networks]} te={te_multipliers} unet={unet_multipliers} dims={dyn_dims} load={t1-t0:.2f}')
+ shared.log.info(f'Load network: type=LoRA apply={[n.name for n in networks.loaded_networks]} te={te_multipliers} unet={unet_multipliers} time={networks.get_timers()}')
def deactivate(self, p):
t0 = time.time()
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 51ef27a8a..86c6e5ed0 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -54,7 +54,8 @@ def total_time():
def get_timers():
t = { 'total': round(sum(timer.values()), 2) }
for k, v in timer.items():
- t[k] = round(v, 2)
+ if v > 0.1:
+ t[k] = round(v, 2)
return t
@@ -216,6 +217,7 @@ def maybe_recompile_model(names, te_multipliers):
def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=None):
+ global backup_size # pylint: disable=global-statement
networks_on_disk: list[network.NetworkOnDisk] = [available_network_aliases.get(name, None) for name in names]
if any(x is None for x in networks_on_disk):
list_available_networks()
@@ -304,10 +306,9 @@ def set_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm
with devices.inference_context():
if weights_backup is not None:
if updown is not None:
- if len(weights_backup.shape) == 4 and weights_backup.shape[1] == 9:
- # inpainting model. zero pad updown to make channel[1] 4 to 9
+ if len(weights_backup.shape) == 4 and weights_backup.shape[1] == 9: # inpainting model. zero pad updown to make channel[1] 4 to 9
updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5)) # pylint: disable=not-callable
- weights_backup = weights_backup.clone().to(device)
+ weights_backup = weights_backup.clone().to(self.weight.device)
weights_backup += updown.to(weights_backup)
if getattr(self, "quant_type", None) in ['nf4', 'fp4']:
bnb = model_quant.load_bnb('Load network: type=LoRA', silent=True)
@@ -375,18 +376,18 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
network_layer_name = getattr(self, 'network_layer_name', None)
current_names = getattr(self, "network_current_names", ())
wanted_names = tuple((x.name, x.te_multiplier, x.unet_multiplier, x.dyn_dim) for x in loaded_networks)
- if network_layer_name is not None and any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419
- maybe_backup_weights(self, wanted_names)
- if current_names != wanted_names:
- batch_updown = None
- batch_ex_bias = None
- t0 = time.time()
- for net in loaded_networks:
- # default workflow where module is known and has weights
- module = net.modules.get(network_layer_name, None)
- if module is not None and hasattr(self, 'weight'):
- try:
- with devices.inference_context():
+ with devices.inference_context():
+ if network_layer_name is not None and any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419
+ maybe_backup_weights(self, wanted_names)
+ if current_names != wanted_names:
+ batch_updown = None
+ batch_ex_bias = None
+ t0 = time.time()
+ for net in loaded_networks:
+ # default workflow where module is known and has weights
+ module = net.modules.get(network_layer_name, None)
+ if module is not None and hasattr(self, 'weight'):
+ try:
weight = self.weight.to(devices.device) # calculate quant weights once
updown, ex_bias = module.calc_updown(weight)
if batch_updown is not None and updown is not None:
@@ -402,22 +403,22 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
batch_updown = batch_updown.to(devices.cpu)
if batch_ex_bias is not None:
batch_ex_bias = batch_ex_bias.to(devices.cpu)
- except RuntimeError as e:
- extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
- if debug:
- module_name = net.modules.get(network_layer_name, None)
- shared.log.error(f'LoRA apply weight name="{net.name}" module="{module_name}" layer="{network_layer_name}" {e}')
- errors.display(e, 'LoRA')
- raise RuntimeError('LoRA apply weight') from e
- continue
- if module is None:
- continue
- shared.log.warning(f'LoRA network="{net.name}" layer="{network_layer_name}" unsupported operation')
- extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
- t1 = time.time()
- timer['calc'] += t1 - t0
- set_weights(self, batch_updown, batch_ex_bias) # Set or restore weights from backup
- self.network_current_names = wanted_names
+ except RuntimeError as e:
+ extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
+ if debug:
+ module_name = net.modules.get(network_layer_name, None)
+ shared.log.error(f'LoRA apply weight name="{net.name}" module="{module_name}" layer="{network_layer_name}" {e}')
+ errors.display(e, 'LoRA')
+ raise RuntimeError('LoRA apply weight') from e
+ continue
+ if module is None:
+ continue
+ shared.log.warning(f'LoRA network="{net.name}" layer="{network_layer_name}" unsupported operation')
+ extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
+ t1 = time.time()
+ timer['calc'] += t1 - t0
+ set_weights(self, batch_updown, batch_ex_bias) # Set or restore weights from backup
+ self.network_current_names = wanted_names
def network_load(): # called from processing
@@ -425,7 +426,7 @@ def network_load(): # called from processing
timer['calc'] = 0
timer['apply'] = 0
sd_model = getattr(shared.sd_model, "pipe", shared.sd_model) # wrapped model compatiblility
- if shared.opts.diffusers_offload_mode != "none":
+ if shared.opts.diffusers_offload_mode == "sequential":
sd_models.disable_offload(sd_model)
sd_models.move_model(sd_model, device=devices.cpu)
modules = []
@@ -441,11 +442,9 @@ def network_load(): # called from processing
pbar.remove_task(task)
modules.clear()
if debug:
- shared.log.debug(f'Load network: type=LoRA modules={len(modules)}')
- if shared.opts.diffusers_offload_mode != "none":
+ shared.log.debug(f'Load network: type=LoRA modules={len(modules)} backup={backup_size} time={get_timers()}')
+ if shared.opts.diffusers_offload_mode == "sequential":
sd_models.set_diffuser_offload(sd_model, op="model")
- if debug:
- shared.log.debug(f'Load network: type=LoRA time={get_timers()} backup={backup_size}')
def list_available_networks():
diff --git a/modules/processing_callbacks.py b/modules/processing_callbacks.py
index e1bf723cc..f3eb0bc37 100644
--- a/modules/processing_callbacks.py
+++ b/modules/processing_callbacks.py
@@ -4,7 +4,6 @@
import torch
import numpy as np
from modules import shared, processing_correction, extra_networks, timer, prompt_parser_diffusers
-from modules.lora.networks import network_load
p = None
@@ -69,7 +68,6 @@ def diffusers_callback(pipe, step: int = 0, timestep: int = 0, kwargs: dict = {}
time.sleep(0.1)
if hasattr(p, "stepwise_lora") and shared.native:
extra_networks.activate(p, p.extra_network_data, step=step)
- network_load()
if latents is None:
return kwargs
elif shared.opts.nan_skip:
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index ae24f5f80..463a15280 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -199,11 +199,6 @@ def process_hires(p: processing.StableDiffusionProcessing, output):
if hasattr(shared.sd_model, "vae") and output.images is not None and len(output.images) > 0:
output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.hr_upscale_to_x, height=p.hr_upscale_to_y) # controlnet cannnot deal with latent input
p.task_args['image'] = output.images # replace so hires uses new output
- sd_models.move_model(shared.sd_model, devices.device)
- if hasattr(shared.sd_model, 'unet'):
- sd_models.move_model(shared.sd_model.unet, devices.device)
- if hasattr(shared.sd_model, 'transformer'):
- sd_models.move_model(shared.sd_model.transformer, devices.device)
update_sampler(p, shared.sd_model, second_pass=True)
orig_denoise = p.denoising_strength
p.denoising_strength = strength
@@ -227,6 +222,11 @@ def process_hires(p: processing.StableDiffusionProcessing, output):
shared.state.job = 'HiRes'
shared.state.sampling_steps = hires_args.get('prior_num_inference_steps', None) or p.steps or hires_args.get('num_inference_steps', None)
try:
+ sd_models.move_model(shared.sd_model, devices.device)
+ if hasattr(shared.sd_model, 'unet'):
+ sd_models.move_model(shared.sd_model.unet, devices.device)
+ if hasattr(shared.sd_model, 'transformer'):
+ sd_models.move_model(shared.sd_model.transformer, devices.device)
sd_models_compile.check_deepcache(enable=True)
output = shared.sd_model(**hires_args) # pylint: disable=not-callable
if isinstance(output, dict):
@@ -405,6 +405,9 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
shared.sd_model = orig_pipeline
return results
+ if shared.opts.diffusers_offload_mode == "balanced":
+ shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+
# sanitize init_images
if hasattr(p, 'init_images') and getattr(p, 'init_images', None) is None:
del p.init_images
@@ -427,10 +430,6 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
if p.negative_prompts is None or len(p.negative_prompts) == 0:
p.negative_prompts = p.all_negative_prompts[p.iteration * p.batch_size:(p.iteration+1) * p.batch_size]
- # load loras
- networks.network_load()
-
- sd_models.move_model(shared.sd_model, devices.device)
sd_models_compile.openvino_recompile_model(p, hires=False, refiner=False) # recompile if a parameter changes
if 'base' not in p.skip:
@@ -461,6 +460,10 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
timer.process.add('lora', networks.total_time())
shared.sd_model = orig_pipeline
+
+ if shared.opts.diffusers_offload_mode == "balanced":
+ shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+
if p.state == '':
global last_p # pylint: disable=global-statement
last_p = p
diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py
index 2edef4bf5..c74731c6d 100644
--- a/modules/prompt_parser_diffusers.py
+++ b/modules/prompt_parser_diffusers.py
@@ -16,6 +16,7 @@
token_dict = None # used by helper get_tokens
token_type = None # used by helper get_tokens
cache = OrderedDict()
+last_attention = None
embedder = None
@@ -52,7 +53,7 @@ def __init__(self, prompts, negative_prompts, steps, clip_skip, p):
self.prompts = prompts
self.negative_prompts = negative_prompts
self.batchsize = len(self.prompts)
- self.attention = None
+ self.attention = last_attention
self.allsame = self.compare_prompts() # collapses batched prompts to single prompt if possible
self.steps = steps
self.clip_skip = clip_skip
@@ -78,6 +79,8 @@ def __init__(self, prompts, negative_prompts, steps, clip_skip, p):
self.scheduled_encode(pipe, batchidx)
else:
self.encode(pipe, prompt, negative_prompt, batchidx)
+ if shared.opts.diffusers_offload_mode == "balanced":
+ pipe = sd_models.apply_balanced_offload(pipe)
self.checkcache(p)
debug(f"Prompt encode: time={(time.time() - t0):.3f}")
@@ -113,6 +116,7 @@ def flatten(xss):
debug(f"Prompt cache: add={key}")
while len(cache) > int(shared.opts.sd_textencoder_cache_size):
cache.popitem(last=False)
+ return True
if item:
self.__dict__.update(cache[key])
cache.move_to_end(key)
@@ -161,7 +165,9 @@ def extend_embeds(self, batchidx, idx): # Extends scheduled prompt via index
self.negative_pooleds[batchidx].append(self.negative_pooleds[batchidx][idx])
def encode(self, pipe, positive_prompt, negative_prompt, batchidx):
+ global last_attention # pylint: disable=global-statement
self.attention = shared.opts.prompt_attention
+ last_attention = self.attention
if self.attention == "xhinker":
prompt_embed, positive_pooled, negative_embed, negative_pooled = get_xhinker_text_embeddings(pipe, positive_prompt, negative_prompt, self.clip_skip)
else:
@@ -178,7 +184,6 @@ def encode(self, pipe, positive_prompt, negative_prompt, batchidx):
if debug_enabled:
get_tokens(pipe, 'positive', positive_prompt)
get_tokens(pipe, 'negative', negative_prompt)
- pipe = prepare_model()
def __call__(self, key, step=0):
batch = getattr(self, key)
diff --git a/modules/sd_models.py b/modules/sd_models.py
index ccba0bfb5..2cf7b3931 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -13,6 +13,7 @@
from rich import progress # pylint: disable=redefined-builtin
import torch
import safetensors.torch
+import accelerate
from omegaconf import OmegaConf
from ldm.util import instantiate_from_config
from modules import paths, shared, shared_state, modelloader, devices, script_callbacks, sd_vae, sd_unet, errors, sd_models_config, sd_models_compile, sd_hijack_accelerate, sd_detect
@@ -310,6 +311,7 @@ def set_accelerate(sd_model):
def set_diffuser_offload(sd_model, op: str = 'model'):
+ t0 = time.time()
if not shared.native:
shared.log.warning('Attempting to use offload with backend=original')
return
@@ -363,41 +365,50 @@ def set_diffuser_offload(sd_model, op: str = 'model'):
sd_model = apply_balanced_offload(sd_model)
except Exception as e:
shared.log.error(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} {e}')
+ process_timer.add('offload', time.time() - t0)
+
+
+class OffloadHook(accelerate.hooks.ModelHook):
+ def init_hook(self, module):
+ return module
+
+ def pre_forward(self, module, *args, **kwargs):
+ if devices.normalize_device(module.device) != devices.normalize_device(devices.device):
+ device_index = torch.device(devices.device).index
+ if device_index is None:
+ device_index = 0
+ max_memory = {
+ device_index: f"{shared.opts.diffusers_offload_max_gpu_memory}GiB",
+ "cpu": f"{shared.opts.diffusers_offload_max_cpu_memory}GiB",
+ }
+ device_map = accelerate.infer_auto_device_map(module, max_memory=max_memory)
+ module = accelerate.hooks.remove_hook_from_module(module, recurse=True)
+ offload_dir = getattr(module, "offload_dir", os.path.join(shared.opts.accelerate_offload_path, module.__class__.__name__))
+ module = accelerate.dispatch_model(module, device_map=device_map, offload_dir=offload_dir)
+ module = accelerate.hooks.add_hook_to_module(module, OffloadHook(), append=True)
+ module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
+ return args, kwargs
+
+ def post_forward(self, module, output):
+ return output
+
+ def detach_hook(self, module):
+ return module
+
+
+offload_hook_instance = OffloadHook()
def apply_balanced_offload(sd_model):
- from accelerate import infer_auto_device_map, dispatch_model
- from accelerate.hooks import add_hook_to_module, remove_hook_from_module, ModelHook
+ t0 = time.time()
excluded = ['OmniGenPipeline']
if sd_model.__class__.__name__ in excluded:
return sd_model
-
- class dispatch_from_cpu_hook(ModelHook):
- def init_hook(self, module):
- return module
-
- def pre_forward(self, module, *args, **kwargs):
- if devices.normalize_device(module.device) != devices.normalize_device(devices.device):
- device_index = torch.device(devices.device).index
- if device_index is None:
- device_index = 0
- max_memory = {
- device_index: f"{shared.opts.diffusers_offload_max_gpu_memory}GiB",
- "cpu": f"{shared.opts.diffusers_offload_max_cpu_memory}GiB",
- }
- device_map = infer_auto_device_map(module, max_memory=max_memory)
- module = remove_hook_from_module(module, recurse=True)
- offload_dir = getattr(module, "offload_dir", os.path.join(shared.opts.accelerate_offload_path, module.__class__.__name__))
- module = dispatch_model(module, device_map=device_map, offload_dir=offload_dir)
- module = add_hook_to_module(module, dispatch_from_cpu_hook(), append=True)
- module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
- return args, kwargs
-
- def post_forward(self, module, output):
- return output
-
- def detach_hook(self, module):
- return module
+ fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
+ debug_move(f'Apply offload: type=balanced fn={fn}')
+ checkpoint_name = sd_model.sd_checkpoint_info.name if getattr(sd_model, "sd_checkpoint_info", None) is not None else None
+ if checkpoint_name is None:
+ checkpoint_name = sd_model.__class__.__name__
def apply_balanced_offload_to_module(pipe):
if hasattr(pipe, "pipe"):
@@ -409,23 +420,19 @@ def apply_balanced_offload_to_module(pipe):
for module_name in keys: # pylint: disable=protected-access
module = getattr(pipe, module_name, None)
if isinstance(module, torch.nn.Module):
- checkpoint_name = pipe.sd_checkpoint_info.name if getattr(pipe, "sd_checkpoint_info", None) is not None else None
- if checkpoint_name is None:
- checkpoint_name = pipe.__class__.__name__
- offload_dir = os.path.join(shared.opts.accelerate_offload_path, checkpoint_name, module_name)
network_layer_name = getattr(module, "network_layer_name", None)
- module = remove_hook_from_module(module, recurse=True)
+ module = accelerate.hooks.remove_hook_from_module(module, recurse=True)
try:
- module = module.to("cpu")
- module.offload_dir = offload_dir
- module = add_hook_to_module(module, dispatch_from_cpu_hook(), append=True)
+ module = module.to(devices.cpu, non_blocking=True)
+ module.offload_dir = os.path.join(shared.opts.accelerate_offload_path, checkpoint_name, module_name)
+ # module = accelerate.hooks.add_hook_to_module(module, OffloadHook(), append=True)
+ module = accelerate.hooks.add_hook_to_module(module, offload_hook_instance, append=True)
module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
if network_layer_name:
module.network_layer_name = network_layer_name
except Exception as e:
if 'bitsandbytes' not in str(e):
shared.log.error(f'Balanced offload: module={module_name} {e}')
- devices.torch_gc(fast=True)
apply_balanced_offload_to_module(sd_model)
if hasattr(sd_model, "pipe"):
@@ -435,6 +442,8 @@ def apply_balanced_offload_to_module(pipe):
if hasattr(sd_model, "decoder_pipe"):
apply_balanced_offload_to_module(sd_model.decoder_pipe)
set_accelerate(sd_model)
+ devices.torch_gc(fast=True)
+ process_timer.add('offload', time.time() - t0)
return sd_model
From b55e746fca34a53effb2219aeef263cbbdb6b03c Mon Sep 17 00:00:00 2001
From: Disty0
Date: Sat, 30 Nov 2024 20:49:57 +0300
Subject: [PATCH 055/162] Improve balanced offload pre forward performance
---
modules/model_stablecascade.py | 8 +++-----
modules/sd_models.py | 18 ++++++++++++------
2 files changed, 15 insertions(+), 11 deletions(-)
diff --git a/modules/model_stablecascade.py b/modules/model_stablecascade.py
index 6c23ea00a..d6f9e4266 100644
--- a/modules/model_stablecascade.py
+++ b/modules/model_stablecascade.py
@@ -330,14 +330,12 @@ def __call__(
elif output_type == "pil":
images = images.permute(0, 2, 3, 1).cpu().float().numpy() # float() as bfloat16-> numpy doesnt work
images = self.numpy_to_pil(images)
+ if shared.opts.diffusers_offload_mode == "balanced":
+ shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
else:
images = latents
- # Offload all models
- if shared.opts.diffusers_offload_mode == "balanced":
- shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
- else:
- self.maybe_free_model_hooks()
+ self.maybe_free_model_hooks()
if not return_dict:
return images
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 2cf7b3931..c2c789987 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -378,15 +378,17 @@ def pre_forward(self, module, *args, **kwargs):
if device_index is None:
device_index = 0
max_memory = {
- device_index: f"{shared.opts.diffusers_offload_max_gpu_memory}GiB",
- "cpu": f"{shared.opts.diffusers_offload_max_cpu_memory}GiB",
+ device_index: int(shared.opts.diffusers_offload_max_gpu_memory * 1024*1024*1024),
+ "cpu": int(shared.opts.diffusers_offload_max_cpu_memory * 1024*1024*1024),
}
- device_map = accelerate.infer_auto_device_map(module, max_memory=max_memory)
- module = accelerate.hooks.remove_hook_from_module(module, recurse=True)
+ device_map = getattr(module, "balanced_offload_device_map", None)
+ if device_map is None or max_memory != getattr(module, "balanced_offload_max_memory", None):
+ device_map = accelerate.infer_auto_device_map(module, max_memory=max_memory)
offload_dir = getattr(module, "offload_dir", os.path.join(shared.opts.accelerate_offload_path, module.__class__.__name__))
module = accelerate.dispatch_model(module, device_map=device_map, offload_dir=offload_dir)
- module = accelerate.hooks.add_hook_to_module(module, OffloadHook(), append=True)
module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
+ module.balanced_offload_device_map = device_map
+ module.balanced_offload_max_memory = max_memory
return args, kwargs
def post_forward(self, module, output):
@@ -421,15 +423,19 @@ def apply_balanced_offload_to_module(pipe):
module = getattr(pipe, module_name, None)
if isinstance(module, torch.nn.Module):
network_layer_name = getattr(module, "network_layer_name", None)
+ device_map = getattr(module, "balanced_offload_device_map", None)
+ max_memory = getattr(module, "balanced_offload_max_memory", None)
module = accelerate.hooks.remove_hook_from_module(module, recurse=True)
try:
module = module.to(devices.cpu, non_blocking=True)
module.offload_dir = os.path.join(shared.opts.accelerate_offload_path, checkpoint_name, module_name)
- # module = accelerate.hooks.add_hook_to_module(module, OffloadHook(), append=True)
module = accelerate.hooks.add_hook_to_module(module, offload_hook_instance, append=True)
module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
if network_layer_name:
module.network_layer_name = network_layer_name
+ if device_map and max_memory:
+ module.balanced_offload_device_map = device_map
+ module.balanced_offload_max_memory = max_memory
except Exception as e:
if 'bitsandbytes' not in str(e):
shared.log.error(f'Balanced offload: module={module_name} {e}')
From 81a95d04cf6db49433c9920efabb90cca3165734 Mon Sep 17 00:00:00 2001
From: Disty0
Date: Sat, 30 Nov 2024 21:12:26 +0300
Subject: [PATCH 056/162] Skip apply_balanced_offload if not needed
---
modules/sd_models.py | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/modules/sd_models.py b/modules/sd_models.py
index c2c789987..6c3ddc6b5 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -389,6 +389,7 @@ def pre_forward(self, module, *args, **kwargs):
module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
module.balanced_offload_device_map = device_map
module.balanced_offload_max_memory = max_memory
+ module.balanced_offload_active = True
return args, kwargs
def post_forward(self, module, output):
@@ -421,7 +422,8 @@ def apply_balanced_offload_to_module(pipe):
keys = get_signature(pipe).keys()
for module_name in keys: # pylint: disable=protected-access
module = getattr(pipe, module_name, None)
- if isinstance(module, torch.nn.Module):
+ balanced_offload_active = getattr(module, "balanced_offload_active", None)
+ if isinstance(module, torch.nn.Module) and (balanced_offload_active is None or balanced_offload_active):
network_layer_name = getattr(module, "network_layer_name", None)
device_map = getattr(module, "balanced_offload_device_map", None)
max_memory = getattr(module, "balanced_offload_max_memory", None)
@@ -431,6 +433,7 @@ def apply_balanced_offload_to_module(pipe):
module.offload_dir = os.path.join(shared.opts.accelerate_offload_path, checkpoint_name, module_name)
module = accelerate.hooks.add_hook_to_module(module, offload_hook_instance, append=True)
module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
+ module.balanced_offload_active = False
if network_layer_name:
module.network_layer_name = network_layer_name
if device_map and max_memory:
@@ -1471,6 +1474,8 @@ def disable_offload(sd_model):
module = getattr(sd_model, module_name, None)
if isinstance(module, torch.nn.Module):
network_layer_name = getattr(module, "network_layer_name", None)
+ if getattr(module, "balanced_offload_active", None) is not None:
+ module.balanced_offload_active = None
module = remove_hook_from_module(module, recurse=True)
if network_layer_name:
module.network_layer_name = network_layer_name
From bccb277dcb15b29f696500f854e59a31694ac235 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Sat, 30 Nov 2024 17:26:49 -0500
Subject: [PATCH 057/162] update lora apply weights and xyz
Signed-off-by: Vladimir Mandic
---
modules/lora/networks.py | 81 ++++++++++++++++++++++------------------
modules/processing.py | 3 +-
scripts/xyz_grid_on.py | 1 +
3 files changed, 48 insertions(+), 37 deletions(-)
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 86c6e5ed0..b06a0c81f 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -27,8 +27,9 @@
available_networks = {}
available_network_aliases = {}
loaded_networks: List[network.Network] = []
-timer = { 'list': 0, 'load': 0, 'backup': 0, 'calc': 0, 'apply': 0, 'restore': 0, 'deactivate': 0 }
+timer = { 'list': 0, 'load': 0, 'backup': 0, 'calc': 0, 'apply': 0, 'move': 0, 'restore': 0, 'deactivate': 0 }
backup_size = 0
+bnb = None
lora_cache = {}
diffuser_loaded = []
diffuser_scales = []
@@ -302,42 +303,41 @@ def set_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm
bias_backup = getattr(self, "network_bias_backup", None)
if weights_backup is None and bias_backup is None:
return
- device = self.weight.device
- with devices.inference_context():
- if weights_backup is not None:
- if updown is not None:
- if len(weights_backup.shape) == 4 and weights_backup.shape[1] == 9: # inpainting model. zero pad updown to make channel[1] 4 to 9
- updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5)) # pylint: disable=not-callable
- weights_backup = weights_backup.clone().to(self.weight.device)
- weights_backup += updown.to(weights_backup)
- if getattr(self, "quant_type", None) in ['nf4', 'fp4']:
- bnb = model_quant.load_bnb('Load network: type=LoRA', silent=True)
- if bnb is not None:
- self.weight = bnb.nn.Params4bit(weights_backup, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
- else:
- self.weight.copy_(weights_backup, non_blocking=True)
+ if weights_backup is not None:
+ if updown is not None and len(weights_backup.shape) == 4 and weights_backup.shape[1] == 9: # inpainting model. zero pad updown to make channel[1] 4 to 9
+ updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5)) # pylint: disable=not-callable
+ if updown is not None:
+ new_weight = updown.to(devices.device) + weights_backup.to(devices.device)
+ if getattr(self, "quant_type", None) in ['nf4', 'fp4'] and bnb is not None:
+ self.weight = bnb.nn.Params4bit(new_weight, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
else:
- self.weight.copy_(weights_backup, non_blocking=True)
- if hasattr(self, "qweight") and hasattr(self, "freeze"):
- self.freeze()
- if bias_backup is not None:
- if ex_bias is not None:
- bias_backup = bias_backup.clone() + ex_bias.to(weights_backup)
- self.bias.copy_(bias_backup)
+ self.weight.copy_(new_weight, non_blocking=True)
+ del new_weight
else:
- self.bias = None
- self.to(device)
+ self.weight.copy_(weights_backup, non_blocking=True)
+ if hasattr(self, "qweight") and hasattr(self, "freeze"):
+ self.freeze()
+ if bias_backup is not None:
+ if ex_bias is not None:
+ new_weight = ex_bias.to(self.bias.device) + bias_backup.to(self.device)
+ self.bias.copy_(new_weight, non_blocking=True)
+ del new_weight
+ else:
+ self.bias.copy_(bias_backup, non_blocking=True)
+ else:
+ self.bias = None
t1 = time.time()
timer['apply'] += t1 - t0
def maybe_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], wanted_names): # pylint: disable=W0613
- global backup_size # pylint: disable=W0603
+ global bnb, backup_size # pylint: disable=W0603
t0 = time.time()
weights_backup = getattr(self, "network_weights_backup", None)
if weights_backup is None and wanted_names != (): # pylint: disable=C1803
if getattr(self.weight, "quant_type", None) in ['nf4', 'fp4']:
- bnb = model_quant.load_bnb('Load network: type=LoRA', silent=True)
+ if bnb is None:
+ bnb = model_quant.load_bnb('Load network: type=LoRA', silent=True)
if bnb is not None:
with devices.inference_context():
weights_backup = bnb.functional.dequantize_4bit(self.weight, quant_state=self.weight.quant_state, quant_type=self.weight.quant_type, blocksize=self.weight.blocksize,)
@@ -375,21 +375,27 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
"""
network_layer_name = getattr(self, 'network_layer_name', None)
current_names = getattr(self, "network_current_names", ())
- wanted_names = tuple((x.name, x.te_multiplier, x.unet_multiplier, x.dyn_dim) for x in loaded_networks)
+ wanted_names = tuple((x.name, x.te_multiplier, x.unet_multiplier, x.dyn_dim) for x in loaded_networks) if len(loaded_networks) > 0 else ()
with devices.inference_context():
- if network_layer_name is not None and any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419
+ if len(loaded_networks) > 0 and network_layer_name is not None and any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419
maybe_backup_weights(self, wanted_names)
if current_names != wanted_names:
+ if shared.opts.diffusers_offload_mode == "none":
+ self.to(devices.device, non_blocking=True)
batch_updown = None
batch_ex_bias = None
- t0 = time.time()
for net in loaded_networks:
- # default workflow where module is known and has weights
module = net.modules.get(network_layer_name, None)
if module is not None and hasattr(self, 'weight'):
try:
- weight = self.weight.to(devices.device) # calculate quant weights once
+ t0 = time.time()
+ weight = self.weight.to(devices.device, non_blocking=True) # calculate quant weights once
+ t1 = time.time()
updown, ex_bias = module.calc_updown(weight)
+ del weight
+ t2 = time.time()
+ timer['move'] += t1 - t0
+ timer['calc'] += t2 - t1
if batch_updown is not None and updown is not None:
batch_updown += updown
else:
@@ -399,10 +405,13 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
else:
batch_ex_bias = ex_bias
if shared.opts.diffusers_offload_mode != "none":
+ t0 = time.time()
if batch_updown is not None:
- batch_updown = batch_updown.to(devices.cpu)
+ batch_updown = batch_updown.to(devices.cpu, non_blocking=True)
if batch_ex_bias is not None:
- batch_ex_bias = batch_ex_bias.to(devices.cpu)
+ batch_ex_bias = batch_ex_bias.to(devices.cpu, non_blocking=True)
+ t1 = time.time()
+ timer['move'] += t1 - t0
except RuntimeError as e:
extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
if debug:
@@ -415,16 +424,16 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
continue
shared.log.warning(f'LoRA network="{net.name}" layer="{network_layer_name}" unsupported operation')
extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
- t1 = time.time()
- timer['calc'] += t1 - t0
set_weights(self, batch_updown, batch_ex_bias) # Set or restore weights from backup
self.network_current_names = wanted_names
+ # self.to(devices.cpu)
-def network_load(): # called from processing
+def network_load():
timer['backup'] = 0
timer['calc'] = 0
timer['apply'] = 0
+ timer['move'] = 0
sd_model = getattr(shared.sd_model, "pipe", shared.sd_model) # wrapped model compatiblility
if shared.opts.diffusers_offload_mode == "sequential":
sd_models.disable_offload(sd_model)
diff --git a/modules/processing.py b/modules/processing.py
index 92faaee8d..ebbaf7272 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -472,7 +472,8 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
if p.scripts is not None and isinstance(p.scripts, scripts.ScriptRunner) and not (shared.state.interrupted or shared.state.skipped):
p.scripts.postprocess(p, processed)
timer.process.record('post')
- shared.log.info(f'Processed: images={len(output_images)} its={(p.steps * len(output_images)) / (t1 - t0):.2f} time={t1-t0:.2f} timers={timer.process.dct(min_time=0.02)} memory={memstats.memory_stats()}')
+ if not p.disable_extra_networks:
+ shared.log.info(f'Processed: images={len(output_images)} its={(p.steps * len(output_images)) / (t1 - t0):.2f} time={t1-t0:.2f} timers={timer.process.dct(min_time=0.02)} memory={memstats.memory_stats()}')
if shared.cmd_opts.malloc:
import tracemalloc
diff --git a/scripts/xyz_grid_on.py b/scripts/xyz_grid_on.py
index 202a2cfc4..aa0897442 100644
--- a/scripts/xyz_grid_on.py
+++ b/scripts/xyz_grid_on.py
@@ -413,6 +413,7 @@ def cell(x, y, z, ix, iy, iz):
p.do_not_save_grid = True
p.do_not_save_samples = True
+ p.disable_extra_networks = True
active = False
cache = processed
return processed
From 22982f3126ecd829b037ed7359556383f93fcffe Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Sat, 30 Nov 2024 18:10:10 -0500
Subject: [PATCH 058/162] update stats
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 3 +++
modules/lora/networks.py | 42 +++++++++++++++++++++++++--------------
modules/processing_vae.py | 2 +-
3 files changed, 31 insertions(+), 16 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index dcb88bcf3..9ab3bcbd1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -40,6 +40,8 @@
- Flux: all-in-one safetensors
example:
- Flux: do not recast quants
+- **Offload** improvements:
+ - faster and more compatible *balanced* mode
- **UI**:
- improved stats on generate completion
- improved live preview display and performance
@@ -69,6 +71,7 @@
- fix gpu memory monitoring
- simplify img2img/inpaint/sketch canvas handling
- fix prompt caching
+- fix xyz grid skip final pass
## Update for 2024-11-21
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index b06a0c81f..604e591a9 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -3,9 +3,10 @@
import re
import time
import concurrent
+from contextlib import nullcontext
import torch
import diffusers.models.lora
-import rich.progress as p
+import rich.progress as rp
import modules.lora.network as network
import modules.lora.network_lora as network_lora
@@ -22,7 +23,6 @@
debug = os.environ.get('SD_LORA_DEBUG', None) is not None
-pbar = p.Progress(p.TextColumn('[cyan]{task.description}'), p.BarColumn(), p.TaskProgressColumn(), p.TimeRemainingColumn(), p.TimeElapsedColumn(), console=shared.console)
extra_network_lora = None
available_networks = {}
available_network_aliases = {}
@@ -307,7 +307,7 @@ def set_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm
if updown is not None and len(weights_backup.shape) == 4 and weights_backup.shape[1] == 9: # inpainting model. zero pad updown to make channel[1] 4 to 9
updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5)) # pylint: disable=not-callable
if updown is not None:
- new_weight = updown.to(devices.device) + weights_backup.to(devices.device)
+ new_weight = updown.to(devices.device, non_blocking=True) + weights_backup.to(devices.device, non_blocking=True)
if getattr(self, "quant_type", None) in ['nf4', 'fp4'] and bnb is not None:
self.weight = bnb.nn.Params4bit(new_weight, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
else:
@@ -319,7 +319,7 @@ def set_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm
self.freeze()
if bias_backup is not None:
if ex_bias is not None:
- new_weight = ex_bias.to(self.bias.device) + bias_backup.to(self.device)
+ new_weight = ex_bias.to(devices.device, non_blocking=True) + bias_backup.to(devices.device, non_blocking=True)
self.bias.copy_(new_weight, non_blocking=True)
del new_weight
else:
@@ -351,7 +351,6 @@ def maybe_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
if shared.opts.lora_offload_backup and weights_backup is not None:
weights_backup = weights_backup.to(devices.cpu)
self.network_weights_backup = weights_backup
- backup_size += weights_backup.numel() * weights_backup.element_size()
bias_backup = getattr(self, "network_bias_backup", None)
if bias_backup is None:
if getattr(self, 'bias', None) is not None:
@@ -361,8 +360,10 @@ def maybe_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
if shared.opts.lora_offload_backup and bias_backup is not None:
bias_backup = bias_backup.to(devices.cpu)
self.network_bias_backup = bias_backup
- if bias_backup is not None:
- backup_size += bias_backup.numel() * bias_backup.element_size()
+ if getattr(self, 'network_weights_backup', None) is not None:
+ backup_size += self.network_weights_backup.numel() * self.network_weights_backup.element_size()
+ if getattr(self, 'network_bias_backup', None) is not None:
+ backup_size += self.network_bias_backup.numel() * self.network_bias_backup.element_size()
t1 = time.time()
timer['backup'] += t1 - t0
@@ -424,9 +425,11 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
continue
shared.log.warning(f'LoRA network="{net.name}" layer="{network_layer_name}" unsupported operation')
extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
- set_weights(self, batch_updown, batch_ex_bias) # Set or restore weights from backup
self.network_current_names = wanted_names
- # self.to(devices.cpu)
+ set_weights(self, batch_updown, batch_ex_bias) # Set or restore weights from backup
+ if batch_updown is not None or batch_ex_bias is not None:
+ return self.weight.device
+ return None
def network_load():
@@ -443,15 +446,24 @@ def network_load():
component = getattr(sd_model, component_name, None)
if component is not None and hasattr(component, 'named_modules'):
modules += list(component.named_modules())
+ devices_used = []
+ if len(loaded_networks) > 0:
+ pbar = rp.Progress(rp.TextColumn('[cyan]{task.description}'), rp.BarColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), console=shared.console)
+ task = pbar.add_task(description='Apply network: type=LoRA' , total=len(modules))
+ else:
+ task = None
+ pbar = nullcontext()
with pbar:
- task = pbar.add_task(description='Apply network: type=LoRA' , total=len(modules), visible=len(loaded_networks) > 0)
for _, module in modules:
- network_apply_weights(module)
- pbar.update(task, advance=1) # progress bar becomes visible if operation takes more than 1sec
- pbar.remove_task(task)
- modules.clear()
+ devices_used.append(network_apply_weights(module))
+ if task is not None:
+ pbar.update(task, advance=1) # progress bar becomes visible if operation takes more than 1sec
+ # pbar.remove_task(task)
if debug:
- shared.log.debug(f'Load network: type=LoRA modules={len(modules)} backup={backup_size} time={get_timers()}')
+ devices_used = [d for d in devices_used if d is not None]
+ devices_set = list(set(devices_used))
+ shared.log.debug(f'Load network: type=LoRA modules={len(modules)} apply={len(devices_used)} device={devices_set} backup={backup_size} time={get_timers()}')
+ modules.clear()
if shared.opts.diffusers_offload_mode == "sequential":
sd_models.set_diffuser_offload(sd_model, op="model")
diff --git a/modules/processing_vae.py b/modules/processing_vae.py
index 1c4a45f07..b114e01d3 100644
--- a/modules/processing_vae.py
+++ b/modules/processing_vae.py
@@ -117,7 +117,7 @@ def full_vae_decode(latents, model):
model.vae.orig_dtype = model.vae.dtype
model.vae = model.vae.to(dtype=torch.float32)
latents = latents.to(torch.float32)
- latents = latents.to(devices.device)
+ latents = latents.to(devices.device, non_blocking=True)
if getattr(model.vae, "post_quant_conv", None) is not None:
latents = latents.to(next(iter(model.vae.post_quant_conv.parameters())).dtype)
From b7aff134a2f50f41b0371489506408a7ca600e85 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Sat, 30 Nov 2024 19:03:51 -0500
Subject: [PATCH 059/162] add low/high threshold to balanced offload
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 2 ++
modules/devices.py | 3 ++-
modules/lora/networks.py | 1 +
modules/processing_helpers.py | 5 ++---
modules/sd_models.py | 27 ++++++++++++++++++++-------
modules/shared.py | 5 +++--
6 files changed, 30 insertions(+), 13 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9ab3bcbd1..c62b6b917 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -42,6 +42,8 @@
- Flux: do not recast quants
- **Offload** improvements:
- faster and more compatible *balanced* mode
+ - balanced offload: units are now in percentage instead of bytes
+ - balanced offload: add both high and low watermark
- **UI**:
- improved stats on generate completion
- improved live preview display and performance
diff --git a/modules/devices.py b/modules/devices.py
index 9ca1863a5..64968a30c 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -224,7 +224,7 @@ def torch_gc(force=False, fast=False):
timer.process.records['gc'] = 0
timer.process.records['gc'] += t1 - t0
if not force or collected == 0:
- return
+ return used_gpu
mem = memstats.memory_stats()
saved = round(gpu.get('used', 0) - mem.get('gpu', {}).get('used', 0), 2)
before = { 'gpu': gpu.get('used', 0), 'ram': ram.get('used', 0) }
@@ -233,6 +233,7 @@ def torch_gc(force=False, fast=False):
results = { 'collected': collected, 'saved': saved }
fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
log.debug(f'GC: utilization={utilization} gc={results} before={before} after={after} device={torch.device(get_optimal_device_name())} fn={fn} time={round(t1 - t0, 2)}') # pylint: disable=protected-access
+ return used_gpu
def set_cuda_sync_mode(mode):
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 604e591a9..69db5fce3 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -218,6 +218,7 @@ def maybe_recompile_model(names, te_multipliers):
def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=None):
+ timer['list'] = 0
global backup_size # pylint: disable=global-statement
networks_on_disk: list[network.NetworkOnDisk] = [available_network_aliases.get(name, None) for name in names]
if any(x is None for x in networks_on_disk):
diff --git a/modules/processing_helpers.py b/modules/processing_helpers.py
index ab08d4cc8..5d2661cc2 100644
--- a/modules/processing_helpers.py
+++ b/modules/processing_helpers.py
@@ -368,14 +368,13 @@ def validate_sample(tensor):
sample = 255.0 * np.moveaxis(sample, 0, 2) if not shared.native else 255.0 * sample
with warnings.catch_warnings(record=True) as w:
cast = sample.astype(np.uint8)
- minimum, maximum, mean = np.min(cast), np.max(cast), np.mean(cast)
- if len(w) > 0 or minimum == maximum:
+ if len(w) > 0:
nans = np.isnan(sample).sum()
cast = np.nan_to_num(sample)
cast = cast.astype(np.uint8)
vae = shared.sd_model.vae.dtype if hasattr(shared.sd_model, 'vae') else None
upcast = getattr(shared.sd_model.vae.config, 'force_upcast', None) if hasattr(shared.sd_model, 'vae') and hasattr(shared.sd_model.vae, 'config') else None
- shared.log.error(f'Decode: sample={sample.shape} invalid={nans} mean={mean} dtype={dtype} vae={vae} upcast={upcast} failed to validate')
+ shared.log.error(f'Decode: sample={sample.shape} invalid={nans} dtype={dtype} vae={vae} upcast={upcast} failed to validate')
if upcast is not None and not upcast:
setattr(shared.sd_model.vae.config, 'force_upcast', True) # noqa: B010
shared.log.warning('Decode: upcast=True set, retry operation')
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 6c3ddc6b5..42bd33d82 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -361,7 +361,7 @@ def set_diffuser_offload(sd_model, op: str = 'model'):
shared.log.error(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} {e}')
if shared.opts.diffusers_offload_mode == "balanced":
try:
- shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} threshold={shared.opts.diffusers_offload_max_gpu_memory} limit={shared.opts.cuda_mem_fraction}')
+ shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} watermarks low={shared.opts.diffusers_offload_min_gpu_memory} high={shared.opts.diffusers_offload_max_gpu_memory} limit={shared.opts.cuda_mem_fraction:.2f}')
sd_model = apply_balanced_offload(sd_model)
except Exception as e:
shared.log.error(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} {e}')
@@ -369,6 +369,16 @@ def set_diffuser_offload(sd_model, op: str = 'model'):
class OffloadHook(accelerate.hooks.ModelHook):
+ def __init__(self):
+ if shared.opts.diffusers_offload_max_gpu_memory > 1:
+ shared.opts.diffusers_offload_max_gpu_memory = 0.75
+ if shared.opts.diffusers_offload_max_cpu_memory > 1:
+ shared.opts.diffusers_offload_max_cpu_memory = 0.75
+ self.gpu = int(shared.gpu_memory * shared.opts.diffusers_offload_max_gpu_memory * 1024*1024*1024)
+ self.cpu = int(shared.cpu_memory * shared.opts.diffusers_offload_max_cpu_memory * 1024*1024*1024)
+ shared.log.info(f'Init offload: type=balanced gpu={self.gpu} cpu={self.cpu}')
+ super().__init__()
+
def init_hook(self, module):
return module
@@ -377,10 +387,7 @@ def pre_forward(self, module, *args, **kwargs):
device_index = torch.device(devices.device).index
if device_index is None:
device_index = 0
- max_memory = {
- device_index: int(shared.opts.diffusers_offload_max_gpu_memory * 1024*1024*1024),
- "cpu": int(shared.opts.diffusers_offload_max_cpu_memory * 1024*1024*1024),
- }
+ max_memory = { device_index: self.gpu, "cpu": self.cpu }
device_map = getattr(module, "balanced_offload_device_map", None)
if device_map is None or max_memory != getattr(module, "balanced_offload_max_memory", None):
device_map = accelerate.infer_auto_device_map(module, max_memory=max_memory)
@@ -399,10 +406,13 @@ def detach_hook(self, module):
return module
-offload_hook_instance = OffloadHook()
+offload_hook_instance = None
def apply_balanced_offload(sd_model):
+ global offload_hook_instance # pylint: disable=global-statement
+ if offload_hook_instance is None:
+ offload_hook_instance = OffloadHook()
t0 = time.time()
excluded = ['OmniGenPipeline']
if sd_model.__class__.__name__ in excluded:
@@ -414,6 +424,7 @@ def apply_balanced_offload(sd_model):
checkpoint_name = sd_model.__class__.__name__
def apply_balanced_offload_to_module(pipe):
+ used_gpu = devices.torch_gc(fast=True)
if hasattr(pipe, "pipe"):
apply_balanced_offload_to_module(pipe.pipe)
if hasattr(pipe, "_internal_dict"):
@@ -429,7 +440,9 @@ def apply_balanced_offload_to_module(pipe):
max_memory = getattr(module, "balanced_offload_max_memory", None)
module = accelerate.hooks.remove_hook_from_module(module, recurse=True)
try:
- module = module.to(devices.cpu, non_blocking=True)
+ if used_gpu > 100 * shared.opts.diffusers_offload_min_gpu_memory:
+ module = module.to(devices.cpu, non_blocking=True)
+ used_gpu = devices.torch_gc(fast=True)
module.offload_dir = os.path.join(shared.opts.accelerate_offload_path, checkpoint_name, module_name)
module = accelerate.hooks.add_hook_to_module(module, offload_hook_instance, append=True)
module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
diff --git a/modules/shared.py b/modules/shared.py
index bcb506cee..21a70fea1 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -560,8 +560,9 @@ def get_default_modes():
"diffusers_extract_ema": OptionInfo(False, "Use model EMA weights when possible"),
"diffusers_generator_device": OptionInfo("GPU", "Generator device", gr.Radio, {"choices": ["GPU", "CPU", "Unset"]}),
"diffusers_offload_mode": OptionInfo(startup_offload_mode, "Model offload mode", gr.Radio, {"choices": ['none', 'balanced', 'model', 'sequential']}),
- "diffusers_offload_max_gpu_memory": OptionInfo(round(gpu_memory * 0.75, 1), "Max GPU memory before balanced offload", gr.Slider, {"minimum": 0, "maximum": gpu_memory, "step": 0.01, "visible": True }),
- "diffusers_offload_max_cpu_memory": OptionInfo(round(cpu_memory * 0.75, 1), "Max CPU memory before balanced offload", gr.Slider, {"minimum": 0, "maximum": cpu_memory, "step": 0.01, "visible": False }),
+ "diffusers_offload_min_gpu_memory": OptionInfo(0.25, "Balanced offload GPU low watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
+ "diffusers_offload_max_gpu_memory": OptionInfo(0.75, "Balanced offload GPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
+ "diffusers_offload_max_cpu_memory": OptionInfo(0.75, "Balanced offload CPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
"diffusers_vae_upcast": OptionInfo("default", "VAE upcasting", gr.Radio, {"choices": ['default', 'true', 'false']}),
"diffusers_vae_slicing": OptionInfo(True, "VAE slicing"),
"diffusers_vae_tiling": OptionInfo(cmd_opts.lowvram or cmd_opts.medvram, "VAE tiling"),
From 05639ca238857604c5516a38334ea0a950fab01c Mon Sep 17 00:00:00 2001
From: Pablo Hellmann
Date: Sun, 1 Dec 2024 01:12:28 +0100
Subject: [PATCH 060/162] prompt token counter fix
---
javascript/black-teal-reimagined.css | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/javascript/black-teal-reimagined.css b/javascript/black-teal-reimagined.css
index b7567ce75..0a985b786 100644
--- a/javascript/black-teal-reimagined.css
+++ b/javascript/black-teal-reimagined.css
@@ -985,9 +985,10 @@ svg.feather.feather-image,
#txt2img_token_counter, #txt2img_negative_token_counter {
display: flex;
- flex-direction: column;
- justify-content: space-evenly;
- padding: 5px;
+ flex-direction: row;
+ padding-top: 1px;
+ opacity: 0.6;
+ z-index: 99;
}
#txt2img_prompt_container {
From c5cd3cb623e1e7999a161024dfc97b01d34d9531 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Sat, 30 Nov 2024 19:15:50 -0500
Subject: [PATCH 061/162] reinit offoad instance on change
Signed-off-by: Vladimir Mandic
---
modules/sd_models.py | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 42bd33d82..24ceff5ee 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -374,9 +374,14 @@ def __init__(self):
shared.opts.diffusers_offload_max_gpu_memory = 0.75
if shared.opts.diffusers_offload_max_cpu_memory > 1:
shared.opts.diffusers_offload_max_cpu_memory = 0.75
+ self.min_watermark = shared.opts.diffusers_offload_min_gpu_memory
+ self.max_watermark = shared.opts.diffusers_offload_max_gpu_memory
+ self.cpu_watermark = shared.opts.diffusers_offload_max_cpu_memory
self.gpu = int(shared.gpu_memory * shared.opts.diffusers_offload_max_gpu_memory * 1024*1024*1024)
self.cpu = int(shared.cpu_memory * shared.opts.diffusers_offload_max_cpu_memory * 1024*1024*1024)
- shared.log.info(f'Init offload: type=balanced gpu={self.gpu} cpu={self.cpu}')
+ gpu_dict = { "min": self.min_watermark, "max": self.max_watermark, "bytes": self.gpu }
+ cpu_dict = { "max": self.cpu_watermark, "bytes": self.cpu }
+ shared.log.info(f'Init offload: type=balanced gpu={gpu_dict} cpu={cpu_dict}')
super().__init__()
def init_hook(self, module):
@@ -411,7 +416,7 @@ def detach_hook(self, module):
def apply_balanced_offload(sd_model):
global offload_hook_instance # pylint: disable=global-statement
- if offload_hook_instance is None:
+ if offload_hook_instance is None or offload_hook_instance.min_watermark != shared.opts.diffusers_offload_min_gpu_memory or offload_hook_instance.max_watermark != shared.opts.diffusers_offload_max_gpu_memory:
offload_hook_instance = OffloadHook()
t0 = time.time()
excluded = ['OmniGenPipeline']
From 03b362e799f7934c792eafceaa9c8367ffceb0aa Mon Sep 17 00:00:00 2001
From: Pablo Hellmann
Date: Sun, 1 Dec 2024 01:23:13 +0100
Subject: [PATCH 062/162] small changes and removed useless css
---
javascript/black-teal-reimagined.css | 15 ++-------------
1 file changed, 2 insertions(+), 13 deletions(-)
diff --git a/javascript/black-teal-reimagined.css b/javascript/black-teal-reimagined.css
index 0a985b786..25f4a3f2c 100644
--- a/javascript/black-teal-reimagined.css
+++ b/javascript/black-teal-reimagined.css
@@ -392,13 +392,13 @@ input[type='range']::-moz-range-track {
}
/* Form Styles */
-div.form {
+div.form, #txt2img_seed_row, #txt2img_subseed_row {
border-width: 0;
box-shadow: var(--shadow-md);
background: var(--background-fill-primary);
border-bottom: 3px solid var(--highlight-color);
padding: 3px;
- border-radius: var(--radius-md);
+ border-radius: var(--radius-lg);
margin: 1px;
}
@@ -700,17 +700,6 @@ svg.feather.feather-image,
height: 2.4em;
}
-#footer,
-#style_pos_col,
-#style_neg_col,
-#roll_col,
-#extras_upscaler_2,
-#extras_upscaler_2_visibility,
-#txt2img_seed_resize_from_w,
-#txt2img_seed_resize_from_h {
- display: none;
-}
-
#save-animation {
border-radius: var(--radius-sm) !important;
margin-bottom: 16px;
From 4089af7c032fe06d98fbc5b3801ccd5fc7d5396b Mon Sep 17 00:00:00 2001
From: Disty0
Date: Sun, 1 Dec 2024 03:52:57 +0300
Subject: [PATCH 063/162] Fix NaNs on Intel with Lora + Offloading
---
modules/lora/networks.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 69db5fce3..4312f3405 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -412,6 +412,9 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
batch_updown = batch_updown.to(devices.cpu, non_blocking=True)
if batch_ex_bias is not None:
batch_ex_bias = batch_ex_bias.to(devices.cpu, non_blocking=True)
+ if devices.backend == "ipex":
+ # using non_blocking=True here causes NaNs on Intel
+ torch.xpu.synchronize(devices.device)
t1 = time.time()
timer['move'] += t1 - t0
except RuntimeError as e:
From e74c038f6405a33753fb12a4e516367cf808b551 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Sat, 30 Nov 2024 19:53:08 -0500
Subject: [PATCH 064/162] interruptible lora apply
Signed-off-by: Vladimir Mandic
---
modules/lora/networks.py | 2 ++
scripts/xyz_grid_shared.py | 2 +-
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 4312f3405..83a62eb5a 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -459,6 +459,8 @@ def network_load():
pbar = nullcontext()
with pbar:
for _, module in modules:
+ if shared.state.interrupted:
+ continue
devices_used.append(network_apply_weights(module))
if task is not None:
pbar.update(task, advance=1) # progress bar becomes visible if operation takes more than 1sec
diff --git a/scripts/xyz_grid_shared.py b/scripts/xyz_grid_shared.py
index 82387fab8..3fc8d32c8 100644
--- a/scripts/xyz_grid_shared.py
+++ b/scripts/xyz_grid_shared.py
@@ -192,7 +192,7 @@ def apply_vae(p, x, xs):
def list_lora():
import sys
- lora = [v for k, v in sys.modules.items() if k == 'networks'][0]
+ lora = [v for k, v in sys.modules.items() if k == 'networks' or k == 'modules.lora.networks'][0]
loras = [v.fullname for v in lora.available_networks.values()]
return ['None'] + loras
From 04c82501d503075edf7afd68157dce450491dc10 Mon Sep 17 00:00:00 2001
From: Pablo Hellmann
Date: Sun, 1 Dec 2024 02:19:47 +0100
Subject: [PATCH 065/162] darker colors and fancy live preview
---
javascript/black-teal-reimagined.css | 28 ++++++++++++++++++++++++++--
1 file changed, 26 insertions(+), 2 deletions(-)
diff --git a/javascript/black-teal-reimagined.css b/javascript/black-teal-reimagined.css
index 25f4a3f2c..7b3a281ee 100644
--- a/javascript/black-teal-reimagined.css
+++ b/javascript/black-teal-reimagined.css
@@ -54,8 +54,8 @@ html {
/* Background Colors */
--background-color: var(--neutral-950);
- --background-fill-primary: var(--neutral-700);
- --input-background-fill: var(--neutral-800);
+ --background-fill-primary: var(--neutral-800);
+ --input-background-fill: var(--neutral-900);
/* Padding and Borders */
--input-padding: 4px;
@@ -402,6 +402,30 @@ div.form, #txt2img_seed_row, #txt2img_subseed_row {
margin: 1px;
}
+/* Image preview styling*/
+#txt2img_gallery {
+ background: var(--background-fill-primary);
+ padding: 5px;
+ margin: 0px;
+}
+
+@keyframes colorChange {
+ 0% {
+ background-color: var(--neutral-800);
+ }
+ 50% {
+ background-color: var(--neutral-700);
+ }
+ 100% {
+ background-color: var(--neutral-800);
+ }
+}
+
+.livePreview {
+ animation: colorChange 3s ease-in-out infinite; /* Adjust the duration as needed */
+ padding: 5px;
+}
+
/* Gradio Style Classes */
fieldset .gr-block.gr-box,
label.block span {
From 75462dd21e71c87d0b619e7898df52d6a6dba434 Mon Sep 17 00:00:00 2001
From: Pablo Hellmann
Date: Sun, 1 Dec 2024 02:39:56 +0100
Subject: [PATCH 066/162] Fancy loader
---
javascript/black-teal-reimagined.css | 111 ++++++++++++++++++++++++++-
1 file changed, 109 insertions(+), 2 deletions(-)
diff --git a/javascript/black-teal-reimagined.css b/javascript/black-teal-reimagined.css
index 7b3a281ee..28176d247 100644
--- a/javascript/black-teal-reimagined.css
+++ b/javascript/black-teal-reimagined.css
@@ -994,8 +994,116 @@ svg.feather.feather-image,
height: 100%;
}
-/* Token counters styling */
+/* loader */
+.splash {
+ position: fixed;
+ top: 0;
+ left: 0;
+ width: 100vw;
+ height: 100vh;
+ z-index: 1000;
+ display: flex;
+ flex-direction: column;
+ align-items: center;
+ justify-content: center;
+ background-color: rgba(0, 0, 0, 0.8);
+}
+
+.motd {
+ margin-top: 1em;
+ color: var(--body-text-color-subdued);
+ font-family: monospace;
+ font-variant: all-petite-caps;
+ font-size: 1.2em;
+}
+
+.splash-img {
+ margin: 0;
+ width: 512px;
+ height: 512px;
+ background-repeat: no-repeat;
+ animation: color 8s infinite alternate, move 3s infinite alternate;
+}
+
+.loading {
+ color: white;
+ position: border-box;
+ top: 85%;
+ font-size: 1.5em;
+}
+
+.loader {
+ width: 100px;
+ height: 100px;
+ border: var(--spacing-md) solid transparent;
+ border-radius: 50%;
+ border-top: var(--spacing-md) solid var(--primary-600);
+ animation: spin 2s linear infinite, pulse 1.5s ease-in-out infinite;
+ position: border-box;
+}
+
+.loader::before,
+.loader::after {
+ content: "";
+ position: absolute;
+ top: 6px;
+ bottom: 6px;
+ left: 6px;
+ right: 6px;
+ border-radius: 50%;
+ border: var(--spacing-md) solid transparent;
+}
+
+.loader::before {
+ border-top-color: var(--primary-900);
+ animation: spin 3s linear infinite;
+}
+
+.loader::after {
+ border-top-color: var(--primary-300);
+ animation: spin 1.5s linear infinite;
+}
+@keyframes move {
+ 0% {
+ transform: translateY(0);
+ }
+ 50% {
+ transform: translateY(-10px);
+ }
+ 100% {
+ transform: translateY(0);
+ }
+}
+
+@keyframes spin {
+ from {
+ transform: rotate(0deg);
+ }
+ to {
+ transform: rotate(360deg);
+ }
+}
+
+@keyframes pulse {
+ 0%, 100% {
+ transform: scale(1);
+ }
+ 50% {
+ transform: scale(1.1);
+ }
+}
+
+@keyframes color {
+ 0% {
+ filter: hue-rotate(0deg);
+ }
+ 100% {
+ filter: hue-rotate(360deg);
+ }
+}
+
+/* Token counters styling */
#txt2img_token_counter, #txt2img_negative_token_counter {
display: flex;
flex-direction: row;
@@ -1063,7 +1171,6 @@ svg.feather.feather-image,
--input-radius: var(--radius-lg);
--input-text-size: var(--text-md);
--input-text-weight: 400;
- --loader-color: var(--color-accent);
--prose-text-size: var(--text-md);
--prose-text-weight: 400;
--prose-header-text-weight: 400;
From 507636d0a15385af8abfebf0c8145e2a5356bd0b Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Sun, 1 Dec 2024 10:54:51 -0500
Subject: [PATCH 067/162] lora-refactor
Signed-off-by: Vladimir Mandic
---
modules/extra_networks.py | 5 +-
modules/lora/extra_networks_lora.py | 5 +-
modules/lora/lora_convert.py | 31 +++
modules/lora/lyco_helpers.py | 4 +-
modules/lora/networks.py | 364 +++++++++++++---------------
scripts/xyz_grid.py | 1 +
6 files changed, 208 insertions(+), 202 deletions(-)
diff --git a/modules/extra_networks.py b/modules/extra_networks.py
index 010157af9..fca48e21c 100644
--- a/modules/extra_networks.py
+++ b/modules/extra_networks.py
@@ -17,8 +17,9 @@ def register_extra_network(extra_network):
def register_default_extra_networks():
from modules.ui_extra_networks_styles import ExtraNetworkStyles
register_extra_network(ExtraNetworkStyles())
- from modules.lora.extra_networks_lora import ExtraNetworkLora
- register_extra_network(ExtraNetworkLora())
+ if shared.native:
+ from modules.lora.networks import extra_network_lora
+ register_extra_network(extra_network_lora)
if shared.opts.hypernetwork_enabled:
from modules.ui_extra_networks_hypernet import ExtraNetworkHypernet
register_extra_network(ExtraNetworkHypernet())
diff --git a/modules/lora/extra_networks_lora.py b/modules/lora/extra_networks_lora.py
index c875ba0d5..d58cebd8f 100644
--- a/modules/lora/extra_networks_lora.py
+++ b/modules/lora/extra_networks_lora.py
@@ -4,6 +4,7 @@
import modules.lora.networks as networks
from modules import extra_networks, shared
+
# from https://github.com/cheald/sd-webui-loractl/blob/master/loractl/lib/utils.py
def get_stepwise(param, step, steps):
def sorted_positions(raw_steps):
@@ -122,8 +123,8 @@ def activate(self, p, params_list, step=0):
self.active = True
self.model = shared.opts.sd_model_checkpoint
names, te_multipliers, unet_multipliers, dyn_dims = parse(p, params_list, step)
- networks.load_networks(names, te_multipliers, unet_multipliers, dyn_dims) # load
- networks.network_load() # backup/apply
+ networks.network_load(names, te_multipliers, unet_multipliers, dyn_dims) # load
+ networks.network_process()
if len(networks.loaded_networks) > 0 and step == 0:
infotext(p)
prompt(p)
diff --git a/modules/lora/lora_convert.py b/modules/lora/lora_convert.py
index dc86a24cf..032ffa5a3 100644
--- a/modules/lora/lora_convert.py
+++ b/modules/lora/lora_convert.py
@@ -476,3 +476,34 @@ def _convert_sd_scripts_to_ai_toolkit(sds_sd):
return new_state_dict
return _convert_sd_scripts_to_ai_toolkit(state_dict)
+
+
+def assign_network_names_to_compvis_modules(sd_model):
+ if sd_model is None:
+ return
+ sd_model = getattr(shared.sd_model, "pipe", shared.sd_model) # wrapped model compatiblility
+ network_layer_mapping = {}
+ if hasattr(sd_model, 'text_encoder') and sd_model.text_encoder is not None:
+ for name, module in sd_model.text_encoder.named_modules():
+ prefix = "lora_te1_" if hasattr(sd_model, 'text_encoder_2') else "lora_te_"
+ network_name = prefix + name.replace(".", "_")
+ network_layer_mapping[network_name] = module
+ module.network_layer_name = network_name
+ if hasattr(sd_model, 'text_encoder_2'):
+ for name, module in sd_model.text_encoder_2.named_modules():
+ network_name = "lora_te2_" + name.replace(".", "_")
+ network_layer_mapping[network_name] = module
+ module.network_layer_name = network_name
+ if hasattr(sd_model, 'unet'):
+ for name, module in sd_model.unet.named_modules():
+ network_name = "lora_unet_" + name.replace(".", "_")
+ network_layer_mapping[network_name] = module
+ module.network_layer_name = network_name
+ if hasattr(sd_model, 'transformer'):
+ for name, module in sd_model.transformer.named_modules():
+ network_name = "lora_transformer_" + name.replace(".", "_")
+ network_layer_mapping[network_name] = module
+ if "norm" in network_name and "linear" not in network_name and shared.sd_model_type != "sd3":
+ continue
+ module.network_layer_name = network_name
+ shared.sd_model.network_layer_mapping = network_layer_mapping
diff --git a/modules/lora/lyco_helpers.py b/modules/lora/lyco_helpers.py
index 9a16d25ab..ac4f2419f 100644
--- a/modules/lora/lyco_helpers.py
+++ b/modules/lora/lyco_helpers.py
@@ -12,13 +12,13 @@ def rebuild_conventional(up, down, shape, dyn_dim=None):
if dyn_dim is not None:
up = up[:, :dyn_dim]
down = down[:dyn_dim, :]
- return (up @ down).reshape(shape)
+ return (up @ down).reshape(shape).to(up.dtype)
def rebuild_cp_decomposition(up, down, mid):
up = up.reshape(up.size(0), -1)
down = down.reshape(down.size(0), -1)
- return torch.einsum('n m k l, i n, m j -> i j k l', mid, up, down)
+ return torch.einsum('n m k l, i n, m j -> i j k l', mid, up, down).to(up.dtype)
# copied from https://github.com/KohakuBlueleaf/LyCORIS/blob/dev/lycoris/modules/lokr.py
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 83a62eb5a..e0f2134c9 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -19,16 +19,16 @@
import modules.lora.network_glora as network_glora
import modules.lora.network_overrides as network_overrides
import modules.lora.lora_convert as lora_convert
+from modules.lora.extra_networks_lora import ExtraNetworkLora
from modules import shared, devices, sd_models, sd_models_compile, errors, files_cache, model_quant
debug = os.environ.get('SD_LORA_DEBUG', None) is not None
-extra_network_lora = None
+extra_network_lora = ExtraNetworkLora()
available_networks = {}
available_network_aliases = {}
loaded_networks: List[network.Network] = []
timer = { 'list': 0, 'load': 0, 'backup': 0, 'calc': 0, 'apply': 0, 'move': 0, 'restore': 0, 'deactivate': 0 }
-backup_size = 0
bnb = None
lora_cache = {}
diffuser_loaded = []
@@ -60,36 +60,7 @@ def get_timers():
return t
-def assign_network_names_to_compvis_modules(sd_model):
- if sd_model is None:
- return
- sd_model = getattr(shared.sd_model, "pipe", shared.sd_model) # wrapped model compatiblility
- network_layer_mapping = {}
- if hasattr(sd_model, 'text_encoder') and sd_model.text_encoder is not None:
- for name, module in sd_model.text_encoder.named_modules():
- prefix = "lora_te1_" if hasattr(sd_model, 'text_encoder_2') else "lora_te_"
- network_name = prefix + name.replace(".", "_")
- network_layer_mapping[network_name] = module
- module.network_layer_name = network_name
- if hasattr(sd_model, 'text_encoder_2'):
- for name, module in sd_model.text_encoder_2.named_modules():
- network_name = "lora_te2_" + name.replace(".", "_")
- network_layer_mapping[network_name] = module
- module.network_layer_name = network_name
- if hasattr(sd_model, 'unet'):
- for name, module in sd_model.unet.named_modules():
- network_name = "lora_unet_" + name.replace(".", "_")
- network_layer_mapping[network_name] = module
- module.network_layer_name = network_name
- if hasattr(sd_model, 'transformer'):
- for name, module in sd_model.transformer.named_modules():
- network_name = "lora_transformer_" + name.replace(".", "_")
- network_layer_mapping[network_name] = module
- if "norm" in network_name and "linear" not in network_name and shared.sd_model_type != "sd3":
- continue
- module.network_layer_name = network_name
- shared.sd_model.network_layer_mapping = network_layer_mapping
-
+# section: load networks from disk
def load_diffusers(name, network_on_disk, lora_scale=shared.opts.extra_networks_default_multiplier) -> Union[network.Network, None]:
name = name.replace(".", "_")
@@ -120,7 +91,7 @@ def load_diffusers(name, network_on_disk, lora_scale=shared.opts.extra_networks_
return net
-def load_network(name, network_on_disk) -> Union[network.Network, None]:
+def load_safetensors(name, network_on_disk) -> Union[network.Network, None]:
if not shared.sd_loaded:
return None
@@ -139,7 +110,7 @@ def load_network(name, network_on_disk) -> Union[network.Network, None]:
sd = lora_convert._convert_kohya_sd3_lora_to_diffusers(sd) or sd # pylint: disable=protected-access
except ValueError: # EAFP for diffusers PEFT keys
pass
- assign_network_names_to_compvis_modules(shared.sd_model)
+ lora_convert.assign_network_names_to_compvis_modules(shared.sd_model)
keys_failed_to_match = {}
matched_networks = {}
bundle_embeddings = {}
@@ -217,9 +188,46 @@ def maybe_recompile_model(names, te_multipliers):
return recompile_model
-def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=None):
+def list_available_networks():
+ t0 = time.time()
+ available_networks.clear()
+ available_network_aliases.clear()
+ forbidden_network_aliases.clear()
+ available_network_hash_lookup.clear()
+ forbidden_network_aliases.update({"none": 1, "Addams": 1})
+ if not os.path.exists(shared.cmd_opts.lora_dir):
+ shared.log.warning(f'LoRA directory not found: path="{shared.cmd_opts.lora_dir}"')
+
+ def add_network(filename):
+ if not os.path.isfile(filename):
+ return
+ name = os.path.splitext(os.path.basename(filename))[0]
+ name = name.replace('.', '_')
+ try:
+ entry = network.NetworkOnDisk(name, filename)
+ available_networks[entry.name] = entry
+ if entry.alias in available_network_aliases:
+ forbidden_network_aliases[entry.alias.lower()] = 1
+ if shared.opts.lora_preferred_name == 'filename':
+ available_network_aliases[entry.name] = entry
+ else:
+ available_network_aliases[entry.alias] = entry
+ if entry.shorthash:
+ available_network_hash_lookup[entry.shorthash] = entry
+ except OSError as e: # should catch FileNotFoundError and PermissionError etc.
+ shared.log.error(f'LoRA: filename="{filename}" {e}')
+
+ candidates = list(files_cache.list_files(shared.cmd_opts.lora_dir, ext_filter=[".pt", ".ckpt", ".safetensors"]))
+ with concurrent.futures.ThreadPoolExecutor(max_workers=shared.max_workers) as executor:
+ for fn in candidates:
+ executor.submit(add_network, fn)
+ t1 = time.time()
+ timer['list'] = t1 - t0
+ shared.log.info(f'Available LoRAs: path="{shared.cmd_opts.lora_dir}" items={len(available_networks)} folders={len(forbidden_network_aliases)} time={t1 - t0:.2f}')
+
+
+def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=None):
timer['list'] = 0
- global backup_size # pylint: disable=global-statement
networks_on_disk: list[network.NetworkOnDisk] = [available_network_aliases.get(name, None) for name in names]
if any(x is None for x in networks_on_disk):
list_available_networks()
@@ -244,7 +252,7 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No
if shared.opts.lora_force_diffusers or network_overrides.check_override(shorthash): # OpenVINO only works with Diffusers LoRa loading
net = load_diffusers(name, network_on_disk, lora_scale=te_multipliers[i] if te_multipliers else shared.opts.extra_networks_default_multiplier)
else:
- net = load_network(name, network_on_disk)
+ net = load_safetensors(name, network_on_disk)
if net is not None:
net.mentioned_name = name
network_on_disk.read_hash()
@@ -294,17 +302,108 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No
devices.torch_gc()
t1 = time.time()
- backup_size = 0
timer['load'] = t1 - t0
-def set_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown, ex_bias):
+# section: process loaded networks
+
+def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], weight, network_layer_name, wanted_names):
+ global bnb # pylint: disable=W0603
+ backup_size = 0
+ if len(loaded_networks) > 0 and network_layer_name is not None and any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419
+ t0 = time.time()
+ weights_backup = getattr(self, "network_weights_backup", None)
+ if weights_backup is None and wanted_names != (): # pylint: disable=C1803
+ self.network_weights_backup = None
+ if getattr(weight, "quant_type", None) in ['nf4', 'fp4']:
+ if bnb is None:
+ bnb = model_quant.load_bnb('Load network: type=LoRA', silent=True)
+ if bnb is not None:
+ with devices.inference_context():
+ weights_backup = bnb.functional.dequantize_4bit(weight, quant_state=weight.quant_state, quant_type=weight.quant_type, blocksize=weight.blocksize,)
+ self.quant_state = weight.quant_state
+ self.quant_type = weight.quant_type
+ self.blocksize = weight.blocksize
+ else:
+ weights_backup = weight.clone()
+ else:
+ weights_backup = weight.clone()
+ if shared.opts.lora_offload_backup and weights_backup is not None:
+ weights_backup = weights_backup.to(devices.cpu)
+ self.network_weights_backup = weights_backup
+ bias_backup = getattr(self, "network_bias_backup", None)
+ if bias_backup is None:
+ if getattr(self, 'bias', None) is not None:
+ bias_backup = self.bias.clone()
+ else:
+ bias_backup = None
+ if shared.opts.lora_offload_backup and bias_backup is not None:
+ bias_backup = bias_backup.to(devices.cpu)
+ self.network_bias_backup = bias_backup
+ if getattr(self, 'network_weights_backup', None) is not None:
+ backup_size += self.network_weights_backup.numel() * self.network_weights_backup.element_size()
+ if getattr(self, 'network_bias_backup', None) is not None:
+ backup_size += self.network_bias_backup.numel() * self.network_bias_backup.element_size()
+ t1 = time.time()
+ timer['backup'] += t1 - t0
+ return backup_size
+
+
+def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], weight, network_layer_name):
+ if shared.opts.diffusers_offload_mode == "none":
+ self.to(devices.device, non_blocking=True)
+ batch_updown = None
+ batch_ex_bias = None
+ for net in loaded_networks:
+ module = net.modules.get(network_layer_name, None)
+ if module is not None and hasattr(self, 'weight'):
+ try:
+ t0 = time.time()
+ updown, ex_bias = module.calc_updown(weight)
+ t1 = time.time()
+ if batch_updown is not None and updown is not None:
+ batch_updown += updown
+ else:
+ batch_updown = updown
+ if batch_ex_bias is not None and ex_bias is not None:
+ batch_ex_bias += ex_bias
+ else:
+ batch_ex_bias = ex_bias
+ timer['calc'] += t1 - t0
+ if shared.opts.diffusers_offload_mode != "none":
+ t0 = time.time()
+ if batch_updown is not None:
+ batch_updown = batch_updown.to(devices.cpu, non_blocking=True)
+ if batch_ex_bias is not None:
+ batch_ex_bias = batch_ex_bias.to(devices.cpu, non_blocking=True)
+ if devices.backend == "ipex":
+ # using non_blocking=True here causes NaNs on Intel
+ torch.xpu.synchronize(devices.device)
+ t1 = time.time()
+ timer['move'] += t1 - t0
+ except RuntimeError as e:
+ extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
+ if debug:
+ module_name = net.modules.get(network_layer_name, None)
+ shared.log.error(f'LoRA apply weight name="{net.name}" module="{module_name}" layer="{network_layer_name}" {e}')
+ errors.display(e, 'LoRA')
+ raise RuntimeError('LoRA apply weight') from e
+ continue
+ if module is None:
+ continue
+ shared.log.warning(f'LoRA network="{net.name}" layer="{network_layer_name}" unsupported operation')
+ extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
+ return batch_updown, batch_ex_bias
+
+
+def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown, ex_bias):
t0 = time.time()
weights_backup = getattr(self, "network_weights_backup", None)
bias_backup = getattr(self, "network_bias_backup", None)
if weights_backup is None and bias_backup is None:
- return
+ return None, None
if weights_backup is not None:
+ self.weight = None
if updown is not None and len(weights_backup.shape) == 4 and weights_backup.shape[1] == 9: # inpainting model. zero pad updown to make channel[1] 4 to 9
updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5)) # pylint: disable=not-callable
if updown is not None:
@@ -312,131 +411,28 @@ def set_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm
if getattr(self, "quant_type", None) in ['nf4', 'fp4'] and bnb is not None:
self.weight = bnb.nn.Params4bit(new_weight, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
else:
- self.weight.copy_(new_weight, non_blocking=True)
+ self.weight = torch.nn.Parameter(new_weight, requires_grad=False)
del new_weight
else:
- self.weight.copy_(weights_backup, non_blocking=True)
+ self.weight = torch.nn.Parameter(weights_backup, requires_grad=False)
if hasattr(self, "qweight") and hasattr(self, "freeze"):
self.freeze()
if bias_backup is not None:
+ self.bias = None
if ex_bias is not None:
new_weight = ex_bias.to(devices.device, non_blocking=True) + bias_backup.to(devices.device, non_blocking=True)
- self.bias.copy_(new_weight, non_blocking=True)
+ self.bias = torch.nn.Parameter(new_weight, requires_grad=False)
del new_weight
else:
- self.bias.copy_(bias_backup, non_blocking=True)
+ self.bias = torch.nn.Parameter(bias_backup, requires_grad=False)
else:
self.bias = None
t1 = time.time()
timer['apply'] += t1 - t0
+ return self.weight.device, self.weight.dtype
-def maybe_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], wanted_names): # pylint: disable=W0613
- global bnb, backup_size # pylint: disable=W0603
- t0 = time.time()
- weights_backup = getattr(self, "network_weights_backup", None)
- if weights_backup is None and wanted_names != (): # pylint: disable=C1803
- if getattr(self.weight, "quant_type", None) in ['nf4', 'fp4']:
- if bnb is None:
- bnb = model_quant.load_bnb('Load network: type=LoRA', silent=True)
- if bnb is not None:
- with devices.inference_context():
- weights_backup = bnb.functional.dequantize_4bit(self.weight, quant_state=self.weight.quant_state, quant_type=self.weight.quant_type, blocksize=self.weight.blocksize,)
- self.quant_state = self.weight.quant_state
- self.quant_type = self.weight.quant_type
- self.blocksize = self.weight.blocksize
- else:
- weights_backup = self.weight.clone()
- else:
- weights_backup = self.weight.clone()
- if shared.opts.lora_offload_backup and weights_backup is not None:
- weights_backup = weights_backup.to(devices.cpu)
- self.network_weights_backup = weights_backup
- bias_backup = getattr(self, "network_bias_backup", None)
- if bias_backup is None:
- if getattr(self, 'bias', None) is not None:
- bias_backup = self.bias.clone()
- else:
- bias_backup = None
- if shared.opts.lora_offload_backup and bias_backup is not None:
- bias_backup = bias_backup.to(devices.cpu)
- self.network_bias_backup = bias_backup
- if getattr(self, 'network_weights_backup', None) is not None:
- backup_size += self.network_weights_backup.numel() * self.network_weights_backup.element_size()
- if getattr(self, 'network_bias_backup', None) is not None:
- backup_size += self.network_bias_backup.numel() * self.network_bias_backup.element_size()
- t1 = time.time()
- timer['backup'] += t1 - t0
-
-
-def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv]):
- """
- Applies the currently selected set of networks to the weights of torch layer self.
- If weights already have this particular set of networks applied, does nothing.
- If not, restores orginal weights from backup and alters weights according to networks.
- """
- network_layer_name = getattr(self, 'network_layer_name', None)
- current_names = getattr(self, "network_current_names", ())
- wanted_names = tuple((x.name, x.te_multiplier, x.unet_multiplier, x.dyn_dim) for x in loaded_networks) if len(loaded_networks) > 0 else ()
- with devices.inference_context():
- if len(loaded_networks) > 0 and network_layer_name is not None and any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419
- maybe_backup_weights(self, wanted_names)
- if current_names != wanted_names:
- if shared.opts.diffusers_offload_mode == "none":
- self.to(devices.device, non_blocking=True)
- batch_updown = None
- batch_ex_bias = None
- for net in loaded_networks:
- module = net.modules.get(network_layer_name, None)
- if module is not None and hasattr(self, 'weight'):
- try:
- t0 = time.time()
- weight = self.weight.to(devices.device, non_blocking=True) # calculate quant weights once
- t1 = time.time()
- updown, ex_bias = module.calc_updown(weight)
- del weight
- t2 = time.time()
- timer['move'] += t1 - t0
- timer['calc'] += t2 - t1
- if batch_updown is not None and updown is not None:
- batch_updown += updown
- else:
- batch_updown = updown
- if batch_ex_bias is not None and ex_bias is not None:
- batch_ex_bias += ex_bias
- else:
- batch_ex_bias = ex_bias
- if shared.opts.diffusers_offload_mode != "none":
- t0 = time.time()
- if batch_updown is not None:
- batch_updown = batch_updown.to(devices.cpu, non_blocking=True)
- if batch_ex_bias is not None:
- batch_ex_bias = batch_ex_bias.to(devices.cpu, non_blocking=True)
- if devices.backend == "ipex":
- # using non_blocking=True here causes NaNs on Intel
- torch.xpu.synchronize(devices.device)
- t1 = time.time()
- timer['move'] += t1 - t0
- except RuntimeError as e:
- extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
- if debug:
- module_name = net.modules.get(network_layer_name, None)
- shared.log.error(f'LoRA apply weight name="{net.name}" module="{module_name}" layer="{network_layer_name}" {e}')
- errors.display(e, 'LoRA')
- raise RuntimeError('LoRA apply weight') from e
- continue
- if module is None:
- continue
- shared.log.warning(f'LoRA network="{net.name}" layer="{network_layer_name}" unsupported operation')
- extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
- self.network_current_names = wanted_names
- set_weights(self, batch_updown, batch_ex_bias) # Set or restore weights from backup
- if batch_updown is not None or batch_ex_bias is not None:
- return self.weight.device
- return None
-
-
-def network_load():
+def network_process():
timer['backup'] = 0
timer['calc'] = 0
timer['apply'] = 0
@@ -450,63 +446,39 @@ def network_load():
component = getattr(sd_model, component_name, None)
if component is not None and hasattr(component, 'named_modules'):
modules += list(component.named_modules())
- devices_used = []
if len(loaded_networks) > 0:
pbar = rp.Progress(rp.TextColumn('[cyan]{task.description}'), rp.BarColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), console=shared.console)
task = pbar.add_task(description='Apply network: type=LoRA' , total=len(modules))
else:
task = None
pbar = nullcontext()
- with pbar:
+ with devices.inference_context(), pbar:
+ wanted_names = tuple((x.name, x.te_multiplier, x.unet_multiplier, x.dyn_dim) for x in loaded_networks) if len(loaded_networks) > 0 else ()
+ applied = 0
+ backup_size = 0
+ weights_devices = []
+ weights_dtypes = []
for _, module in modules:
- if shared.state.interrupted:
+ network_layer_name = getattr(module, 'network_layer_name', None)
+ current_names = getattr(module, "network_current_names", ())
+ if shared.state.interrupted or network_layer_name is None or current_names == wanted_names:
continue
- devices_used.append(network_apply_weights(module))
+ weight = module.weight.to(devices.device, non_blocking=True) if hasattr(module, 'weight') else None
+ backup_size += network_backup_weights(module, weight, network_layer_name, wanted_names)
+ batch_updown, batch_ex_bias = network_calc_weights(module, weight, network_layer_name)
+ del weight
+ weights_device, weights_dtype = network_apply_weights(module, batch_updown, batch_ex_bias)
+ weights_devices.append(weights_device)
+ weights_dtypes.append(weights_dtype)
+ module.network_current_names = wanted_names
if task is not None:
pbar.update(task, advance=1) # progress bar becomes visible if operation takes more than 1sec
+ if batch_updown is not None or batch_ex_bias is not None:
+ applied += 1
# pbar.remove_task(task)
- if debug:
- devices_used = [d for d in devices_used if d is not None]
- devices_set = list(set(devices_used))
- shared.log.debug(f'Load network: type=LoRA modules={len(modules)} apply={len(devices_used)} device={devices_set} backup={backup_size} time={get_timers()}')
+ weights_devices, weights_dtypes = list(set([x for x in weights_devices if x is not None])), list(set([x for x in weights_dtypes if x is not None])) # noqa: C403
+ if debug and len(loaded_networks) > 0:
+ shared.log.debug(f'Load network: type=LoRA modules={len(modules)} networks={len(loaded_networks)} apply={applied} device={weights_devices} dtype={weights_dtypes} backup={backup_size} time={get_timers()}')
modules.clear()
if shared.opts.diffusers_offload_mode == "sequential":
sd_models.set_diffuser_offload(sd_model, op="model")
-
-
-def list_available_networks():
- t0 = time.time()
- available_networks.clear()
- available_network_aliases.clear()
- forbidden_network_aliases.clear()
- available_network_hash_lookup.clear()
- forbidden_network_aliases.update({"none": 1, "Addams": 1})
- if not os.path.exists(shared.cmd_opts.lora_dir):
- shared.log.warning(f'LoRA directory not found: path="{shared.cmd_opts.lora_dir}"')
-
- def add_network(filename):
- if not os.path.isfile(filename):
- return
- name = os.path.splitext(os.path.basename(filename))[0]
- name = name.replace('.', '_')
- try:
- entry = network.NetworkOnDisk(name, filename)
- available_networks[entry.name] = entry
- if entry.alias in available_network_aliases:
- forbidden_network_aliases[entry.alias.lower()] = 1
- if shared.opts.lora_preferred_name == 'filename':
- available_network_aliases[entry.name] = entry
- else:
- available_network_aliases[entry.alias] = entry
- if entry.shorthash:
- available_network_hash_lookup[entry.shorthash] = entry
- except OSError as e: # should catch FileNotFoundError and PermissionError etc.
- shared.log.error(f'LoRA: filename="{filename}" {e}')
-
- candidates = list(files_cache.list_files(shared.cmd_opts.lora_dir, ext_filter=[".pt", ".ckpt", ".safetensors"]))
- with concurrent.futures.ThreadPoolExecutor(max_workers=shared.max_workers) as executor:
- for fn in candidates:
- executor.submit(add_network, fn)
- t1 = time.time()
- timer['list'] = t1 - t0
- shared.log.info(f'Available LoRAs: path="{shared.cmd_opts.lora_dir}" items={len(available_networks)} folders={len(forbidden_network_aliases)} time={t1 - t0:.2f}')
diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py
index 60e608c76..0395ce736 100644
--- a/scripts/xyz_grid.py
+++ b/scripts/xyz_grid.py
@@ -12,6 +12,7 @@
from scripts.xyz_grid_shared import str_permutations, list_to_csv_string, re_range # pylint: disable=no-name-in-module
from scripts.xyz_grid_classes import axis_options, AxisOption, SharedSettingsStackHelper # pylint: disable=no-name-in-module
from scripts.xyz_grid_draw import draw_xyz_grid # pylint: disable=no-name-in-module
+from scripts.xyz_grid_shared import apply_field, apply_task_args, apply_setting, apply_prompt, apply_order, apply_sampler, apply_hr_sampler_name, confirm_samplers, apply_checkpoint, apply_refiner, apply_unet, apply_dict, apply_clip_skip, apply_vae, list_lora, apply_lora, apply_lora_strength, apply_te, apply_styles, apply_upscaler, apply_context, apply_detailer, apply_override, apply_processing, apply_options, apply_seed, format_value_add_label, format_value, format_value_join_list, do_nothing, format_nothing, str_permutations # pylint: disable=no-name-in-module, unused-import
from modules import shared, errors, scripts, images, processing
from modules.ui_components import ToolButton
import modules.ui_symbols as symbols
From 023b13b6cb7a2d1bfaed0a55022e29bf2efd2d13 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Sun, 1 Dec 2024 15:34:25 -0500
Subject: [PATCH 068/162] balanced offload improvements
Signed-off-by: Vladimir Mandic
---
modules/call_queue.py | 20 +++++++++-------
modules/devices.py | 4 ++--
modules/lora/networks.py | 11 +++++----
modules/memstats.py | 18 +++++++++++---
modules/processing_diffusers.py | 2 ++
modules/prompt_parser_diffusers.py | 38 +++++++++++++++++-------------
modules/sd_models.py | 20 +++++++---------
modules/shared.py | 2 +-
modules/ui_control.py | 19 +++++++++------
9 files changed, 80 insertions(+), 54 deletions(-)
diff --git a/modules/call_queue.py b/modules/call_queue.py
index cdc2fe1f7..11ba7b56e 100644
--- a/modules/call_queue.py
+++ b/modules/call_queue.py
@@ -73,16 +73,20 @@ def f(*args, extra_outputs_array=extra_outputs, **kwargs):
elapsed_m = int(elapsed // 60)
elapsed_s = elapsed % 60
elapsed_text = f"{elapsed_m}m {elapsed_s:.2f}s" if elapsed_m > 0 else f"{elapsed_s:.2f}s"
- summary = timer.process.summary(min_time=0.1, total=False).replace('=', ' ')
- vram_html = ''
+ summary = timer.process.summary(min_time=0.25, total=False).replace('=', ' ')
+ gpu = ''
+ cpu = ''
if not shared.mem_mon.disabled:
vram = {k: -(v//-(1024*1024)) for k, v in shared.mem_mon.read().items()}
- used = round(100 * vram['used'] / (vram['total'] + 0.001))
- if vram.get('active_peak', 0) > 0:
- vram_html = " | "
- vram_html += f"GPU {max(vram['active_peak'], vram['reserved_peak'])} MB {used}%"
- vram_html += f" | retries {vram['retries']} oom {vram['oom']}" if vram.get('retries', 0) > 0 or vram.get('oom', 0) > 0 else ''
+ peak = max(vram['active_peak'], vram['reserved_peak'], vram['used'])
+ used = round(100.0 * peak / vram['total']) if vram['total'] > 0 else 0
+ if used > 0:
+ gpu += f"| GPU {peak} MB {used}%"
+ gpu += f" | retries {vram['retries']} oom {vram['oom']}" if vram.get('retries', 0) > 0 or vram.get('oom', 0) > 0 else ''
+ ram = shared.ram_stats()
+ if ram['used'] > 0:
+ cpu += f"| RAM {ram['used']} GB {round(100.0 * ram['used'] / ram['total'])}%"
if isinstance(res, list):
- res[-1] += f""
+ res[-1] += f""
return tuple(res)
return f
diff --git a/modules/devices.py b/modules/devices.py
index 64968a30c..71eef5726 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -224,7 +224,7 @@ def torch_gc(force=False, fast=False):
timer.process.records['gc'] = 0
timer.process.records['gc'] += t1 - t0
if not force or collected == 0:
- return used_gpu
+ return used_gpu, used_ram
mem = memstats.memory_stats()
saved = round(gpu.get('used', 0) - mem.get('gpu', {}).get('used', 0), 2)
before = { 'gpu': gpu.get('used', 0), 'ram': ram.get('used', 0) }
@@ -233,7 +233,7 @@ def torch_gc(force=False, fast=False):
results = { 'collected': collected, 'saved': saved }
fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
log.debug(f'GC: utilization={utilization} gc={results} before={before} after={after} device={torch.device(get_optimal_device_name())} fn={fn} time={round(t1 - t0, 2)}') # pylint: disable=protected-access
- return used_gpu
+ return used_gpu, used_ram
def set_cuda_sync_mode(mode):
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index e0f2134c9..21d641af6 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -447,8 +447,8 @@ def network_process():
if component is not None and hasattr(component, 'named_modules'):
modules += list(component.named_modules())
if len(loaded_networks) > 0:
- pbar = rp.Progress(rp.TextColumn('[cyan]{task.description}'), rp.BarColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), console=shared.console)
- task = pbar.add_task(description='Apply network: type=LoRA' , total=len(modules))
+ pbar = rp.Progress(rp.TextColumn('[cyan]Apply network: type=LoRA'), rp.BarColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console)
+ task = pbar.add_task(description='' , total=len(modules))
else:
task = None
pbar = nullcontext()
@@ -463,7 +463,8 @@ def network_process():
current_names = getattr(module, "network_current_names", ())
if shared.state.interrupted or network_layer_name is None or current_names == wanted_names:
continue
- weight = module.weight.to(devices.device, non_blocking=True) if hasattr(module, 'weight') else None
+ weight = getattr(module, 'weight', None)
+ weight = weight.to(devices.device, non_blocking=True) if weight is not None else None
backup_size += network_backup_weights(module, weight, network_layer_name, wanted_names)
batch_updown, batch_ex_bias = network_calc_weights(module, weight, network_layer_name)
del weight
@@ -472,13 +473,13 @@ def network_process():
weights_dtypes.append(weights_dtype)
module.network_current_names = wanted_names
if task is not None:
- pbar.update(task, advance=1) # progress bar becomes visible if operation takes more than 1sec
+ pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} modules={len(modules)} apply={applied} backup={backup_size}')
if batch_updown is not None or batch_ex_bias is not None:
applied += 1
# pbar.remove_task(task)
weights_devices, weights_dtypes = list(set([x for x in weights_devices if x is not None])), list(set([x for x in weights_dtypes if x is not None])) # noqa: C403
if debug and len(loaded_networks) > 0:
- shared.log.debug(f'Load network: type=LoRA modules={len(modules)} networks={len(loaded_networks)} apply={applied} device={weights_devices} dtype={weights_dtypes} backup={backup_size} time={get_timers()}')
+ shared.log.debug(f'Load network: type=LoRA networks={len(loaded_networks)} modules={len(modules)} apply={applied} device={weights_devices} dtype={weights_dtypes} backup={backup_size} time={get_timers()}')
modules.clear()
if shared.opts.diffusers_offload_mode == "sequential":
sd_models.set_diffuser_offload(sd_model, op="model")
diff --git a/modules/memstats.py b/modules/memstats.py
index c417165a2..7836f7636 100644
--- a/modules/memstats.py
+++ b/modules/memstats.py
@@ -5,11 +5,12 @@
fail_once = False
+def gb(val: float):
+ return round(val / 1024 / 1024 / 1024, 2)
+
+
def memory_stats():
global fail_once # pylint: disable=global-statement
- def gb(val: float):
- return round(val / 1024 / 1024 / 1024, 2)
-
mem = {}
try:
process = psutil.Process(os.getpid())
@@ -38,3 +39,14 @@ def gb(val: float):
except Exception:
pass
return mem
+
+
+def ram_stats():
+ try:
+ process = psutil.Process(os.getpid())
+ res = process.memory_info()
+ ram_total = 100 * res.rss / process.memory_percent()
+ ram = { 'used': gb(res.rss), 'total': gb(ram_total) }
+ return ram
+ except Exception:
+ return { 'used': 0, 'total': 0 }
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index 463a15280..c605a761c 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -83,6 +83,8 @@ def process_base(p: processing.StableDiffusionProcessing):
try:
t0 = time.time()
sd_models_compile.check_deepcache(enable=True)
+ if shared.opts.diffusers_offload_mode == "balanced":
+ shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
sd_models.move_model(shared.sd_model, devices.device)
if hasattr(shared.sd_model, 'unet'):
sd_models.move_model(shared.sd_model.unet, devices.device)
diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py
index c74731c6d..d2093351a 100644
--- a/modules/prompt_parser_diffusers.py
+++ b/modules/prompt_parser_diffusers.py
@@ -39,8 +39,8 @@ def prepare_model(pipe = None):
pipe = pipe.pipe
if not hasattr(pipe, "text_encoder"):
return None
- if shared.opts.diffusers_offload_mode == "balanced":
- pipe = sd_models.apply_balanced_offload(pipe)
+ # if shared.opts.diffusers_offload_mode == "balanced":
+ # pipe = sd_models.apply_balanced_offload(pipe)
elif hasattr(pipe, "maybe_free_model_hooks"):
pipe.maybe_free_model_hooks()
devices.torch_gc()
@@ -79,8 +79,8 @@ def __init__(self, prompts, negative_prompts, steps, clip_skip, p):
self.scheduled_encode(pipe, batchidx)
else:
self.encode(pipe, prompt, negative_prompt, batchidx)
- if shared.opts.diffusers_offload_mode == "balanced":
- pipe = sd_models.apply_balanced_offload(pipe)
+ # if shared.opts.diffusers_offload_mode == "balanced":
+ # pipe = sd_models.apply_balanced_offload(pipe)
self.checkcache(p)
debug(f"Prompt encode: time={(time.time() - t0):.3f}")
@@ -199,8 +199,6 @@ def __call__(self, key, step=0):
def compel_hijack(self, token_ids: torch.Tensor, attention_mask: typing.Optional[torch.Tensor] = None) -> torch.Tensor:
- if not devices.same_device(self.text_encoder.device, devices.device):
- sd_models.move_model(self.text_encoder, devices.device)
needs_hidden_states = self.returned_embeddings_type != 1
text_encoder_output = self.text_encoder(token_ids, attention_mask, output_hidden_states=needs_hidden_states, return_dict=True)
@@ -377,25 +375,31 @@ def prepare_embedding_providers(pipe, clip_skip) -> list[EmbeddingsProvider]:
embedding_type = -(clip_skip + 1)
else:
embedding_type = clip_skip
+ embedding_args = {
+ 'truncate': False,
+ 'returned_embeddings_type': embedding_type,
+ 'device': device,
+ 'dtype_for_device_getter': lambda device: devices.dtype,
+ }
if getattr(pipe, "prior_pipe", None) is not None and getattr(pipe.prior_pipe, "tokenizer", None) is not None and getattr(pipe.prior_pipe, "text_encoder", None) is not None:
- provider = EmbeddingsProvider(padding_attention_mask_value=0, tokenizer=pipe.prior_pipe.tokenizer, text_encoder=pipe.prior_pipe.text_encoder, truncate=False, returned_embeddings_type=embedding_type, device=device)
+ provider = EmbeddingsProvider(padding_attention_mask_value=0, tokenizer=pipe.prior_pipe.tokenizer, text_encoder=pipe.prior_pipe.text_encoder, **embedding_args)
embeddings_providers.append(provider)
- no_mask_provider = EmbeddingsProvider(padding_attention_mask_value=1 if "sote" in pipe.sd_checkpoint_info.name.lower() else 0, tokenizer=pipe.prior_pipe.tokenizer, text_encoder=pipe.prior_pipe.text_encoder, truncate=False, returned_embeddings_type=embedding_type, device=device)
+ no_mask_provider = EmbeddingsProvider(padding_attention_mask_value=1 if "sote" in pipe.sd_checkpoint_info.name.lower() else 0, tokenizer=pipe.prior_pipe.tokenizer, text_encoder=pipe.prior_pipe.text_encoder, **embedding_args)
embeddings_providers.append(no_mask_provider)
elif getattr(pipe, "tokenizer", None) is not None and getattr(pipe, "text_encoder", None) is not None:
- if not devices.same_device(pipe.text_encoder.device, devices.device):
- sd_models.move_model(pipe.text_encoder, devices.device)
- provider = EmbeddingsProvider(tokenizer=pipe.tokenizer, text_encoder=pipe.text_encoder, truncate=False, returned_embeddings_type=embedding_type, device=device)
+ if pipe.text_encoder.__class__.__name__.startswith('CLIP'):
+ sd_models.move_model(pipe.text_encoder, devices.device, force=True)
+ provider = EmbeddingsProvider(tokenizer=pipe.tokenizer, text_encoder=pipe.text_encoder, **embedding_args)
embeddings_providers.append(provider)
if getattr(pipe, "tokenizer_2", None) is not None and getattr(pipe, "text_encoder_2", None) is not None:
- if not devices.same_device(pipe.text_encoder_2.device, devices.device):
- sd_models.move_model(pipe.text_encoder_2, devices.device)
- provider = EmbeddingsProvider(tokenizer=pipe.tokenizer_2, text_encoder=pipe.text_encoder_2, truncate=False, returned_embeddings_type=embedding_type, device=device)
+ if pipe.text_encoder_2.__class__.__name__.startswith('CLIP'):
+ sd_models.move_model(pipe.text_encoder_2, devices.device, force=True)
+ provider = EmbeddingsProvider(tokenizer=pipe.tokenizer_2, text_encoder=pipe.text_encoder_2, **embedding_args)
embeddings_providers.append(provider)
if getattr(pipe, "tokenizer_3", None) is not None and getattr(pipe, "text_encoder_3", None) is not None:
- if not devices.same_device(pipe.text_encoder_3.device, devices.device):
- sd_models.move_model(pipe.text_encoder_3, devices.device)
- provider = EmbeddingsProvider(tokenizer=pipe.tokenizer_3, text_encoder=pipe.text_encoder_3, truncate=False, returned_embeddings_type=embedding_type, device=device)
+ if pipe.text_encoder_3.__class__.__name__.startswith('CLIP'):
+ sd_models.move_model(pipe.text_encoder_3, devices.device, force=True)
+ provider = EmbeddingsProvider(tokenizer=pipe.tokenizer_3, text_encoder=pipe.text_encoder_3, **embedding_args)
embeddings_providers.append(provider)
return embeddings_providers
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 24ceff5ee..83bf6f994 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -401,7 +401,6 @@ def pre_forward(self, module, *args, **kwargs):
module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
module.balanced_offload_device_map = device_map
module.balanced_offload_max_memory = max_memory
- module.balanced_offload_active = True
return args, kwargs
def post_forward(self, module, output):
@@ -429,7 +428,7 @@ def apply_balanced_offload(sd_model):
checkpoint_name = sd_model.__class__.__name__
def apply_balanced_offload_to_module(pipe):
- used_gpu = devices.torch_gc(fast=True)
+ used_gpu, used_ram = devices.torch_gc(fast=True)
if hasattr(pipe, "pipe"):
apply_balanced_offload_to_module(pipe.pipe)
if hasattr(pipe, "_internal_dict"):
@@ -438,20 +437,21 @@ def apply_balanced_offload_to_module(pipe):
keys = get_signature(pipe).keys()
for module_name in keys: # pylint: disable=protected-access
module = getattr(pipe, module_name, None)
- balanced_offload_active = getattr(module, "balanced_offload_active", None)
- if isinstance(module, torch.nn.Module) and (balanced_offload_active is None or balanced_offload_active):
+ if isinstance(module, torch.nn.Module):
network_layer_name = getattr(module, "network_layer_name", None)
device_map = getattr(module, "balanced_offload_device_map", None)
max_memory = getattr(module, "balanced_offload_max_memory", None)
module = accelerate.hooks.remove_hook_from_module(module, recurse=True)
try:
if used_gpu > 100 * shared.opts.diffusers_offload_min_gpu_memory:
+ debug_move(f'Balanced offload: gpu={used_gpu} ram={used_ram} current={module.device} target={devices.cpu} component={module.__class__.__name__}')
module = module.to(devices.cpu, non_blocking=True)
- used_gpu = devices.torch_gc(fast=True)
+ used_gpu, used_ram = devices.torch_gc(fast=True)
+ else:
+ debug_move(f'Balanced offload: gpu={used_gpu} ram={used_ram} current={module.device} target={devices.cpu} component={module.__class__.__name__}')
module.offload_dir = os.path.join(shared.opts.accelerate_offload_path, checkpoint_name, module_name)
module = accelerate.hooks.add_hook_to_module(module, offload_hook_instance, append=True)
module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
- module.balanced_offload_active = False
if network_layer_name:
module.network_layer_name = network_layer_name
if device_map and max_memory:
@@ -515,13 +515,13 @@ def move_model(model, device=None, force=False):
shared.log.error(f'Model move execution device: device={device} {e}')
if getattr(model, 'has_accelerate', False) and not force:
return
- if hasattr(model, "device") and devices.normalize_device(model.device) == devices.normalize_device(device):
+ if hasattr(model, "device") and devices.normalize_device(model.device) == devices.normalize_device(device) and not force:
return
try:
t0 = time.time()
try:
if hasattr(model, 'to'):
- model.to(device)
+ model.to(device, non_blocking=True)
if hasattr(model, "prior_pipe"):
model.prior_pipe.to(device)
except Exception as e0:
@@ -551,7 +551,7 @@ def move_model(model, device=None, force=False):
if 'move' not in process_timer.records:
process_timer.records['move'] = 0
process_timer.records['move'] += t1 - t0
- if os.environ.get('SD_MOVE_DEBUG', None) or (t1-t0) > 1:
+ if os.environ.get('SD_MOVE_DEBUG', None) or (t1-t0) > 2:
shared.log.debug(f'Model move: device={device} class={model.__class__.__name__} accelerate={getattr(model, "has_accelerate", False)} fn={fn} time={t1-t0:.2f}') # pylint: disable=protected-access
devices.torch_gc()
@@ -1492,8 +1492,6 @@ def disable_offload(sd_model):
module = getattr(sd_model, module_name, None)
if isinstance(module, torch.nn.Module):
network_layer_name = getattr(module, "network_layer_name", None)
- if getattr(module, "balanced_offload_active", None) is not None:
- module.balanced_offload_active = None
module = remove_hook_from_module(module, recurse=True)
if network_layer_name:
module.network_layer_name = network_layer_name
diff --git a/modules/shared.py b/modules/shared.py
index 21a70fea1..4b7f34e83 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -20,7 +20,7 @@
from modules.paths import models_path, script_path, data_path, sd_configs_path, sd_default_config, sd_model_file, default_sd_model_file, extensions_dir, extensions_builtin_dir # pylint: disable=W0611
from modules.dml import memory_providers, default_memory_provider, directml_do_hijack
from modules.onnx_impl import initialize_onnx, execution_providers
-from modules.memstats import memory_stats
+from modules.memstats import memory_stats, ram_stats
from modules.ui_components import DropdownEditable
import modules.interrogate
import modules.memmon
diff --git a/modules/ui_control.py b/modules/ui_control.py
index 072d9b9c9..59db12fc5 100644
--- a/modules/ui_control.py
+++ b/modules/ui_control.py
@@ -29,15 +29,20 @@ def return_stats(t: float = None):
elapsed_m = int(elapsed // 60)
elapsed_s = elapsed % 60
elapsed_text = f"Time: {elapsed_m}m {elapsed_s:.2f}s |" if elapsed_m > 0 else f"Time: {elapsed_s:.2f}s |"
- summary = timer.process.summary(min_time=0.1, total=False).replace('=', ' ')
- vram_html = ''
+ summary = timer.process.summary(min_time=0.25, total=False).replace('=', ' ')
+ gpu = ''
+ cpu = ''
if not shared.mem_mon.disabled:
vram = {k: -(v//-(1024*1024)) for k, v in shared.mem_mon.read().items()}
- used = round(100 * vram['used'] / (vram['total'] + 0.001))
- if vram.get('active_peak', 0) > 0:
- vram_html += f"| GPU {max(vram['active_peak'], vram['reserved_peak'])} MB {used}%"
- vram_html += f" | retries {vram['retries']} oom {vram['oom']}" if vram.get('retries', 0) > 0 or vram.get('oom', 0) > 0 else ''
- return f""
+ peak = max(vram['active_peak'], vram['reserved_peak'], vram['used'])
+ used = round(100.0 * peak / vram['total']) if vram['total'] > 0 else 0
+ if used > 0:
+ gpu += f"| GPU {peak} MB {used}%"
+ gpu += f" | retries {vram['retries']} oom {vram['oom']}" if vram.get('retries', 0) > 0 or vram.get('oom', 0) > 0 else ''
+ ram = shared.ram_stats()
+ if ram['used'] > 0:
+ cpu += f"| RAM {ram['used']} GB {round(100.0 * ram['used'] / ram['total'])}%"
+ return f""
def return_controls(res, t: float = None):
From 82eb9244865e50526002bd7cf952fe1cabbb422a Mon Sep 17 00:00:00 2001
From: Disty0
Date: Mon, 2 Dec 2024 00:29:01 +0300
Subject: [PATCH 069/162] Reduce balanced offload max gpu memory to 0.70
---
modules/shared.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/modules/shared.py b/modules/shared.py
index 4b7f34e83..387b6ca40 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -561,7 +561,7 @@ def get_default_modes():
"diffusers_generator_device": OptionInfo("GPU", "Generator device", gr.Radio, {"choices": ["GPU", "CPU", "Unset"]}),
"diffusers_offload_mode": OptionInfo(startup_offload_mode, "Model offload mode", gr.Radio, {"choices": ['none', 'balanced', 'model', 'sequential']}),
"diffusers_offload_min_gpu_memory": OptionInfo(0.25, "Balanced offload GPU low watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
- "diffusers_offload_max_gpu_memory": OptionInfo(0.75, "Balanced offload GPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
+ "diffusers_offload_max_gpu_memory": OptionInfo(0.70, "Balanced offload GPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
"diffusers_offload_max_cpu_memory": OptionInfo(0.75, "Balanced offload CPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
"diffusers_vae_upcast": OptionInfo("default", "VAE upcasting", gr.Radio, {"choices": ['default', 'true', 'false']}),
"diffusers_vae_slicing": OptionInfo(True, "VAE slicing"),
From 106f93f07963667de96399cfccc1d5e396e95c41 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Sun, 1 Dec 2024 17:13:53 -0500
Subject: [PATCH 070/162] memory optimizations
Signed-off-by: Vladimir Mandic
---
modules/processing.py | 1 +
modules/processing_args.py | 24 ++++++++++++++++--------
modules/sd_models.py | 29 +++++++++++++++--------------
3 files changed, 32 insertions(+), 22 deletions(-)
diff --git a/modules/processing.py b/modules/processing.py
index ebbaf7272..095eba54c 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -483,4 +483,5 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
for stat in stats[:20]:
frame = stat.traceback[0]
shared.log.debug(f' file="{frame.filename}":{frame.lineno} size={stat.size}')
+ devices.torch_gc(force=True)
return processed
diff --git a/modules/processing_args.py b/modules/processing_args.py
index a716b685e..4ce552825 100644
--- a/modules/processing_args.py
+++ b/modules/processing_args.py
@@ -12,7 +12,8 @@
from modules.api import helpers
-debug = shared.log.trace if os.environ.get('SD_DIFFUSERS_DEBUG', None) is not None else lambda *args, **kwargs: None
+debug_enabled = os.environ.get('SD_DIFFUSERS_DEBUG', None)
+debug_log = shared.log.trace if os.environ.get('SD_DIFFUSERS_DEBUG', None) is not None else lambda *args, **kwargs: None
def task_specific_kwargs(p, model):
@@ -93,7 +94,8 @@ def task_specific_kwargs(p, model):
'target_subject_category': getattr(p, 'prompt', '').split()[-1],
'output_type': 'pil',
}
- debug(f'Diffusers task specific args: {task_args}')
+ if debug_enabled:
+ debug_log(f'Diffusers task specific args: {task_args}')
return task_args
@@ -108,7 +110,8 @@ def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2
signature = inspect.signature(type(model).__call__, follow_wrapped=True)
possible = list(signature.parameters)
- debug(f'Diffusers pipeline possible: {possible}')
+ if debug_enabled:
+ debug_log(f'Diffusers pipeline possible: {possible}')
prompts, negative_prompts, prompts_2, negative_prompts_2 = fix_prompts(prompts, negative_prompts, prompts_2, negative_prompts_2)
steps = kwargs.get("num_inference_steps", None) or len(getattr(p, 'timesteps', ['1']))
clip_skip = kwargs.pop("clip_skip", 1)
@@ -159,6 +162,8 @@ def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2
args['negative_prompt'] = negative_prompts[0]
else:
args['negative_prompt'] = negative_prompts
+ if prompt_parser_diffusers.embedder is not None and not prompt_parser_diffusers.embedder.scheduled_prompt: # not scheduled so we dont need it anymore
+ prompt_parser_diffusers.embedder = None
if 'clip_skip' in possible and parser == 'fixed':
if clip_skip == 1:
@@ -248,14 +253,16 @@ def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2
if arg in possible:
args[arg] = task_kwargs[arg]
task_args = getattr(p, 'task_args', {})
- debug(f'Diffusers task args: {task_args}')
+ if debug_enabled:
+ debug_log(f'Diffusers task args: {task_args}')
for k, v in task_args.items():
if k in possible:
args[k] = v
else:
- debug(f'Diffusers unknown task args: {k}={v}')
+ debug_log(f'Diffusers unknown task args: {k}={v}')
cross_attention_args = getattr(p, 'cross_attention_kwargs', {})
- debug(f'Diffusers cross-attention args: {cross_attention_args}')
+ if debug_enabled:
+ debug_log(f'Diffusers cross-attention args: {cross_attention_args}')
for k, v in cross_attention_args.items():
if args.get('cross_attention_kwargs', None) is None:
args['cross_attention_kwargs'] = {}
@@ -273,7 +280,7 @@ def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2
# handle implicit controlnet
if 'control_image' in possible and 'control_image' not in args and 'image' in args:
- debug('Diffusers: set control image')
+ debug_log('Diffusers: set control image')
args['control_image'] = args['image']
sd_hijack_hypertile.hypertile_set(p, hr=len(getattr(p, 'init_images', [])) > 0)
@@ -309,5 +316,6 @@ def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2
if shared.cmd_opts.profile:
t1 = time.time()
shared.log.debug(f'Profile: pipeline args: {t1-t0:.2f}')
- debug(f'Diffusers pipeline args: {args}')
+ if debug_enabled:
+ debug_log(f'Diffusers pipeline args: {args}')
return args
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 83bf6f994..37567962c 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -443,23 +443,24 @@ def apply_balanced_offload_to_module(pipe):
max_memory = getattr(module, "balanced_offload_max_memory", None)
module = accelerate.hooks.remove_hook_from_module(module, recurse=True)
try:
- if used_gpu > 100 * shared.opts.diffusers_offload_min_gpu_memory:
- debug_move(f'Balanced offload: gpu={used_gpu} ram={used_ram} current={module.device} target={devices.cpu} component={module.__class__.__name__}')
- module = module.to(devices.cpu, non_blocking=True)
- used_gpu, used_ram = devices.torch_gc(fast=True)
- else:
- debug_move(f'Balanced offload: gpu={used_gpu} ram={used_ram} current={module.device} target={devices.cpu} component={module.__class__.__name__}')
- module.offload_dir = os.path.join(shared.opts.accelerate_offload_path, checkpoint_name, module_name)
- module = accelerate.hooks.add_hook_to_module(module, offload_hook_instance, append=True)
- module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
- if network_layer_name:
- module.network_layer_name = network_layer_name
- if device_map and max_memory:
- module.balanced_offload_device_map = device_map
- module.balanced_offload_max_memory = max_memory
+ do_offload = used_gpu > 100 * shared.opts.diffusers_offload_min_gpu_memory
+ debug_move(f'Balanced offload: gpu={used_gpu} ram={used_ram} current={module.device} dtype={module.dtype} op={"move" if do_offload else "skip"} component={module.__class__.__name__}')
+ if do_offload:
+ module = module.to(devices.cpu)
+ used_gpu, used_ram = devices.torch_gc(fast=True, force=True)
except Exception as e:
if 'bitsandbytes' not in str(e):
shared.log.error(f'Balanced offload: module={module_name} {e}')
+ if os.environ.get('SD_MOVE_DEBUG', None):
+ errors.display(e, f'Balanced offload: module={module_name}')
+ module.offload_dir = os.path.join(shared.opts.accelerate_offload_path, checkpoint_name, module_name)
+ module = accelerate.hooks.add_hook_to_module(module, offload_hook_instance, append=True)
+ module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
+ if network_layer_name:
+ module.network_layer_name = network_layer_name
+ if device_map and max_memory:
+ module.balanced_offload_device_map = device_map
+ module.balanced_offload_max_memory = max_memory
apply_balanced_offload_to_module(sd_model)
if hasattr(sd_model, "pipe"):
From f3761032cfecc59305b927af69bbf2699bb8f17e Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Sun, 1 Dec 2024 17:26:34 -0500
Subject: [PATCH 071/162] cleanup
Signed-off-by: Vladimir Mandic
---
modules/sd_checkpoint.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/modules/sd_checkpoint.py b/modules/sd_checkpoint.py
index 2f6533ef0..6ab396329 100644
--- a/modules/sd_checkpoint.py
+++ b/modules/sd_checkpoint.py
@@ -253,6 +253,8 @@ def select_checkpoint(op='model'):
model_checkpoint = shared.opts.data.get('sd_model_refiner', None)
else:
model_checkpoint = shared.opts.sd_model_checkpoint
+ if len(model_checkpoint) < 3:
+ return None
if model_checkpoint is None or model_checkpoint == 'None':
return None
checkpoint_info = get_closet_checkpoint_match(model_checkpoint)
From 7e2034c4ffdb8c61fab47f36568a04a1391cba42 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Mon, 2 Dec 2024 10:51:41 -0500
Subject: [PATCH 072/162] lora add fuse
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 5 +-
extensions-builtin/Lora/network_overrides.py | 1 -
modules/lora/extra_networks_lora.py | 3 +-
modules/lora/networks.py | 61 ++++++++++++++------
modules/shared.py | 10 ++--
scripts/xyz_grid.py | 2 +-
6 files changed, 54 insertions(+), 28 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c62b6b917..a9109341d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
# Change Log for SD.Next
-## Update for 2024-11-30
+## Update for 2024-12-02
### New models and integrations
@@ -32,6 +32,9 @@
### UI and workflow improvements
+- **LoRA** handler rewrite
+ - LoRA weights are no longer calculated on-the-fly during model execution, but are pre-calculated at the start
+ this results in perceived overhead on generate startup, but results in overall faster execution as LoRA does not need to be processed on each step
- **Model loader** improvements:
- detect model components on model load fail
- allow passing absolute path to model loader
diff --git a/extensions-builtin/Lora/network_overrides.py b/extensions-builtin/Lora/network_overrides.py
index 5334f3c1b..b5c28b718 100644
--- a/extensions-builtin/Lora/network_overrides.py
+++ b/extensions-builtin/Lora/network_overrides.py
@@ -26,7 +26,6 @@
force_models = [ # forced always
'sc',
- # 'sd3',
'kandinsky',
'hunyuandit',
'auraflow',
diff --git a/modules/lora/extra_networks_lora.py b/modules/lora/extra_networks_lora.py
index d58cebd8f..57966550a 100644
--- a/modules/lora/extra_networks_lora.py
+++ b/modules/lora/extra_networks_lora.py
@@ -124,7 +124,7 @@ def activate(self, p, params_list, step=0):
self.model = shared.opts.sd_model_checkpoint
names, te_multipliers, unet_multipliers, dyn_dims = parse(p, params_list, step)
networks.network_load(names, te_multipliers, unet_multipliers, dyn_dims) # load
- networks.network_process()
+ networks.network_activate()
if len(networks.loaded_networks) > 0 and step == 0:
infotext(p)
prompt(p)
@@ -141,6 +141,7 @@ def deactivate(self, p):
shared.sd_model.unload_lora_weights() # fails for non-CLIP models
except Exception:
pass
+ networks.network_deactivate()
t1 = time.time()
networks.timer['restore'] += t1 - t0
if self.active and networks.debug:
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 21d641af6..48073774c 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -310,12 +310,15 @@ def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=Non
def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], weight, network_layer_name, wanted_names):
global bnb # pylint: disable=W0603
backup_size = 0
- if len(loaded_networks) > 0 and network_layer_name is not None and any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419
+ if len(loaded_networks) > 0 and network_layer_name is not None and any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419 # pylint: disable=R1729
t0 = time.time()
+
weights_backup = getattr(self, "network_weights_backup", None)
if weights_backup is None and wanted_names != (): # pylint: disable=C1803
self.network_weights_backup = None
- if getattr(weight, "quant_type", None) in ['nf4', 'fp4']:
+ if shared.opts.lora_fuse_diffusers:
+ weights_backup = True
+ elif getattr(weight, "quant_type", None) in ['nf4', 'fp4']:
if bnb is None:
bnb = model_quant.load_bnb('Load network: type=LoRA', silent=True)
if bnb is not None:
@@ -328,22 +331,26 @@ def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.n
weights_backup = weight.clone()
else:
weights_backup = weight.clone()
- if shared.opts.lora_offload_backup and weights_backup is not None:
+ if shared.opts.lora_offload_backup and weights_backup is not None and isinstance(weights_backup, torch.Tensor):
weights_backup = weights_backup.to(devices.cpu)
self.network_weights_backup = weights_backup
bias_backup = getattr(self, "network_bias_backup", None)
if bias_backup is None:
if getattr(self, 'bias', None) is not None:
- bias_backup = self.bias.clone()
+ if shared.opts.lora_fuse_diffusers:
+ bias_backup = True
+ else:
+ bias_backup = self.bias.clone()
else:
bias_backup = None
- if shared.opts.lora_offload_backup and bias_backup is not None:
+ if shared.opts.lora_offload_backup and bias_backup is not None and isinstance(bias_backup, torch.Tensor):
bias_backup = bias_backup.to(devices.cpu)
self.network_bias_backup = bias_backup
+
if getattr(self, 'network_weights_backup', None) is not None:
- backup_size += self.network_weights_backup.numel() * self.network_weights_backup.element_size()
+ backup_size += self.network_weights_backup.numel() * self.network_weights_backup.element_size() if isinstance(self.network_weights_backup, torch.Tensor) else 0
if getattr(self, 'network_bias_backup', None) is not None:
- backup_size += self.network_bias_backup.numel() * self.network_bias_backup.element_size()
+ backup_size += self.network_bias_backup.numel() * self.network_bias_backup.element_size() if isinstance(self.network_bias_backup, torch.Tensor) else 0
t1 = time.time()
timer['backup'] += t1 - t0
return backup_size
@@ -396,18 +403,24 @@ def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
return batch_updown, batch_ex_bias
-def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown, ex_bias):
+def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown, ex_bias, apply: bool = True):
t0 = time.time()
weights_backup = getattr(self, "network_weights_backup", None)
bias_backup = getattr(self, "network_bias_backup", None)
if weights_backup is None and bias_backup is None:
return None, None
if weights_backup is not None:
- self.weight = None
+ if isinstance(weights_backup, bool):
+ weights_backup = self.weight
+ else:
+ self.weight = None
if updown is not None and len(weights_backup.shape) == 4 and weights_backup.shape[1] == 9: # inpainting model. zero pad updown to make channel[1] 4 to 9
updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5)) # pylint: disable=not-callable
if updown is not None:
- new_weight = updown.to(devices.device, non_blocking=True) + weights_backup.to(devices.device, non_blocking=True)
+ if apply:
+ new_weight = weights_backup.to(devices.device, non_blocking=True) + updown.to(devices.device, non_blocking=True)
+ else:
+ new_weight = weights_backup.to(devices.device, non_blocking=True) - updown.to(devices.device, non_blocking=True)
if getattr(self, "quant_type", None) in ['nf4', 'fp4'] and bnb is not None:
self.weight = bnb.nn.Params4bit(new_weight, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
else:
@@ -418,9 +431,15 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
if hasattr(self, "qweight") and hasattr(self, "freeze"):
self.freeze()
if bias_backup is not None:
- self.bias = None
+ if isinstance(bias_backup, bool):
+ bias_backup = self.bias
+ else:
+ self.bias = None
if ex_bias is not None:
- new_weight = ex_bias.to(devices.device, non_blocking=True) + bias_backup.to(devices.device, non_blocking=True)
+ if apply:
+ new_weight = bias_backup.to(devices.device, non_blocking=True) + ex_bias.to(devices.device, non_blocking=True)
+ else:
+ new_weight = bias_backup.to(devices.device, non_blocking=True) - ex_bias.to(devices.device, non_blocking=True)
self.bias = torch.nn.Parameter(new_weight, requires_grad=False)
del new_weight
else:
@@ -432,7 +451,10 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
return self.weight.device, self.weight.dtype
-def network_process():
+def network_deactivate():
+ pass
+
+def network_activate():
timer['backup'] = 0
timer['calc'] = 0
timer['apply'] = 0
@@ -462,24 +484,25 @@ def network_process():
network_layer_name = getattr(module, 'network_layer_name', None)
current_names = getattr(module, "network_current_names", ())
if shared.state.interrupted or network_layer_name is None or current_names == wanted_names:
+ if task is not None:
+ pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} skip')
continue
weight = getattr(module, 'weight', None)
weight = weight.to(devices.device, non_blocking=True) if weight is not None else None
backup_size += network_backup_weights(module, weight, network_layer_name, wanted_names)
batch_updown, batch_ex_bias = network_calc_weights(module, weight, network_layer_name)
- del weight
weights_device, weights_dtype = network_apply_weights(module, batch_updown, batch_ex_bias)
weights_devices.append(weights_device)
weights_dtypes.append(weights_dtype)
+ if batch_updown is not None or batch_ex_bias is not None:
+ applied += 1
+ del weight, batch_updown, batch_ex_bias
module.network_current_names = wanted_names
if task is not None:
pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} modules={len(modules)} apply={applied} backup={backup_size}')
- if batch_updown is not None or batch_ex_bias is not None:
- applied += 1
- # pbar.remove_task(task)
- weights_devices, weights_dtypes = list(set([x for x in weights_devices if x is not None])), list(set([x for x in weights_dtypes if x is not None])) # noqa: C403
+ weights_devices, weights_dtypes = list(set([x for x in weights_devices if x is not None])), list(set([x for x in weights_dtypes if x is not None])) # noqa: C403 # pylint: disable=R1718
if debug and len(loaded_networks) > 0:
- shared.log.debug(f'Load network: type=LoRA networks={len(loaded_networks)} modules={len(modules)} apply={applied} device={weights_devices} dtype={weights_dtypes} backup={backup_size} time={get_timers()}')
+ shared.log.debug(f'Load network: type=LoRA networks={len(loaded_networks)} modules={len(modules)} apply={applied} device={weights_devices} dtype={weights_dtypes} backup={backup_size} fuse={shared.opts.lora_fuse_diffusers} time={get_timers()}')
modules.clear()
if shared.opts.diffusers_offload_mode == "sequential":
sd_models.set_diffuser_offload(sd_model, op="model")
diff --git a/modules/shared.py b/modules/shared.py
index 387b6ca40..10167c809 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -20,7 +20,7 @@
from modules.paths import models_path, script_path, data_path, sd_configs_path, sd_default_config, sd_model_file, default_sd_model_file, extensions_dir, extensions_builtin_dir # pylint: disable=W0611
from modules.dml import memory_providers, default_memory_provider, directml_do_hijack
from modules.onnx_impl import initialize_onnx, execution_providers
-from modules.memstats import memory_stats, ram_stats
+from modules.memstats import memory_stats
from modules.ui_components import DropdownEditable
import modules.interrogate
import modules.memmon
@@ -903,16 +903,16 @@ def get_default_modes():
"wildcards_enabled": OptionInfo(True, "Enable file wildcards support"),
"extra_networks_lora_sep": OptionInfo("LoRA
", "", gr.HTML),
"extra_networks_default_multiplier": OptionInfo(1.0, "Default strength", gr.Slider, {"minimum": 0.0, "maximum": 2.0, "step": 0.01}),
- "lora_preferred_name": OptionInfo("filename", "LoRA preferred name", gr.Radio, {"choices": ["filename", "alias"]}),
+ "lora_preferred_name": OptionInfo("filename", "LoRA preferred name", gr.Radio, {"choices": ["filename", "alias"], "visible": False}),
"lora_add_hashes_to_infotext": OptionInfo(False, "LoRA add hash info"),
+ "lora_fuse_diffusers": OptionInfo(False if not cmd_opts.use_openvino else True, "LoRA fuse directly to model"),
+ "lora_load_gpu": OptionInfo(True if not (cmd_opts.lowvram or cmd_opts.medvram) else False, "LoRA load directly to GPU"),
+ "lora_offload_backup": OptionInfo(True, "LoRA offload backup weights"),
"lora_force_diffusers": OptionInfo(False if not cmd_opts.use_openvino else True, "LoRA force loading of all models using Diffusers"),
"lora_maybe_diffusers": OptionInfo(False, "LoRA force loading of specific models using Diffusers"),
- "lora_fuse_diffusers": OptionInfo(False if not cmd_opts.use_openvino else True, "LoRA use fuse when possible"),
"lora_apply_tags": OptionInfo(0, "LoRA auto-apply tags", gr.Slider, {"minimum": -1, "maximum": 32, "step": 1}),
"lora_in_memory_limit": OptionInfo(0, "LoRA memory cache", gr.Slider, {"minimum": 0, "maximum": 24, "step": 1}),
"lora_quant": OptionInfo("NF4","LoRA precision in quantized models", gr.Radio, {"choices": ["NF4", "FP4"]}),
- "lora_load_gpu": OptionInfo(True if not (cmd_opts.lowvram or cmd_opts.medvram) else False, "Load LoRA directly to GPU"),
- "lora_offload_backup": OptionInfo(True, "Offload LoRA Backup Weights"),
}))
options_templates.update(options_section((None, "Internal options"), {
diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py
index 0395ce736..bb067ea21 100644
--- a/scripts/xyz_grid.py
+++ b/scripts/xyz_grid.py
@@ -12,7 +12,7 @@
from scripts.xyz_grid_shared import str_permutations, list_to_csv_string, re_range # pylint: disable=no-name-in-module
from scripts.xyz_grid_classes import axis_options, AxisOption, SharedSettingsStackHelper # pylint: disable=no-name-in-module
from scripts.xyz_grid_draw import draw_xyz_grid # pylint: disable=no-name-in-module
-from scripts.xyz_grid_shared import apply_field, apply_task_args, apply_setting, apply_prompt, apply_order, apply_sampler, apply_hr_sampler_name, confirm_samplers, apply_checkpoint, apply_refiner, apply_unet, apply_dict, apply_clip_skip, apply_vae, list_lora, apply_lora, apply_lora_strength, apply_te, apply_styles, apply_upscaler, apply_context, apply_detailer, apply_override, apply_processing, apply_options, apply_seed, format_value_add_label, format_value, format_value_join_list, do_nothing, format_nothing, str_permutations # pylint: disable=no-name-in-module, unused-import
+from scripts.xyz_grid_shared import apply_field, apply_task_args, apply_setting, apply_prompt, apply_order, apply_sampler, apply_hr_sampler_name, confirm_samplers, apply_checkpoint, apply_refiner, apply_unet, apply_dict, apply_clip_skip, apply_vae, list_lora, apply_lora, apply_lora_strength, apply_te, apply_styles, apply_upscaler, apply_context, apply_detailer, apply_override, apply_processing, apply_options, apply_seed, format_value_add_label, format_value, format_value_join_list, do_nothing, format_nothing # pylint: disable=no-name-in-module, unused-import
from modules import shared, errors, scripts, images, processing
from modules.ui_components import ToolButton
import modules.ui_symbols as symbols
From 4eac263055b280e948d1a22d8987353efe99d446 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Mon, 2 Dec 2024 11:21:39 -0500
Subject: [PATCH 073/162] add bdia sampler
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 6 +-
modules/lora/networks.py | 12 +-
modules/schedulers/scheduler_bdia.py | 551 +++++++++++++++++++++++++++
modules/sd_samplers_diffusers.py | 3 +
modules/shared.py | 2 +-
5 files changed, 563 insertions(+), 11 deletions(-)
create mode 100644 modules/schedulers/scheduler_bdia.py
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a9109341d..57b08fbe7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -32,9 +32,12 @@
### UI and workflow improvements
-- **LoRA** handler rewrite
+- **LoRA** handler rewrite:
- LoRA weights are no longer calculated on-the-fly during model execution, but are pre-calculated at the start
this results in perceived overhead on generate startup, but results in overall faster execution as LoRA does not need to be processed on each step
+ - *note*: LoRA weights backups are required so LoRA can be unapplied, but can take quite a lot of system memory
+ if you know you will not need to unapply LoRA, you can disable backups in *settings -> networks -> lora fuse*
+ in which case, you need to reload model to unapply LoRA
- **Model loader** improvements:
- detect model components on model load fail
- allow passing absolute path to model loader
@@ -60,6 +63,7 @@
- **Sampler** improvements
- Euler FlowMatch: add sigma methods (*karras/exponential/betas*)
- DPM FlowMatch: update all and add sigma methods
+ - BDIA-DDIM: *experimental*
### Fixes
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 48073774c..14fce760a 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -403,7 +403,7 @@ def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
return batch_updown, batch_ex_bias
-def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown, ex_bias, apply: bool = True):
+def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown, ex_bias):
t0 = time.time()
weights_backup = getattr(self, "network_weights_backup", None)
bias_backup = getattr(self, "network_bias_backup", None)
@@ -417,10 +417,7 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
if updown is not None and len(weights_backup.shape) == 4 and weights_backup.shape[1] == 9: # inpainting model. zero pad updown to make channel[1] 4 to 9
updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5)) # pylint: disable=not-callable
if updown is not None:
- if apply:
- new_weight = weights_backup.to(devices.device, non_blocking=True) + updown.to(devices.device, non_blocking=True)
- else:
- new_weight = weights_backup.to(devices.device, non_blocking=True) - updown.to(devices.device, non_blocking=True)
+ new_weight = weights_backup.to(devices.device, non_blocking=True) + updown.to(devices.device, non_blocking=True)
if getattr(self, "quant_type", None) in ['nf4', 'fp4'] and bnb is not None:
self.weight = bnb.nn.Params4bit(new_weight, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
else:
@@ -436,10 +433,7 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
else:
self.bias = None
if ex_bias is not None:
- if apply:
- new_weight = bias_backup.to(devices.device, non_blocking=True) + ex_bias.to(devices.device, non_blocking=True)
- else:
- new_weight = bias_backup.to(devices.device, non_blocking=True) - ex_bias.to(devices.device, non_blocking=True)
+ new_weight = bias_backup.to(devices.device, non_blocking=True) + ex_bias.to(devices.device, non_blocking=True)
self.bias = torch.nn.Parameter(new_weight, requires_grad=False)
del new_weight
else:
diff --git a/modules/schedulers/scheduler_bdia.py b/modules/schedulers/scheduler_bdia.py
new file mode 100644
index 000000000..bb3e7f9b2
--- /dev/null
+++ b/modules/schedulers/scheduler_bdia.py
@@ -0,0 +1,551 @@
+# Copyright 2024 Stanford University Team and The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# DISCLAIMER: This code is strongly influenced by https://github.com/pesser/pytorch_diffusion
+# and https://github.com/hojonathanho/diffusion
+
+import math
+from dataclasses import dataclass
+from typing import List, Optional, Tuple, Union
+
+import numpy as np
+import torch
+
+from diffusers.configuration_utils import ConfigMixin, register_to_config
+from diffusers.utils import BaseOutput
+from diffusers.utils.torch_utils import randn_tensor
+from diffusers.schedulers.scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin
+
+
+@dataclass
+# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->DDIM
+class DDIMSchedulerOutput(BaseOutput):
+ """
+ Output class for the scheduler's `step` function output.
+
+ Args:
+ prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
+ Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
+ denoising loop.
+ pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
+ The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
+ `pred_original_sample` can be used to preview progress or for guidance.
+ """
+
+ prev_sample: torch.Tensor
+ pred_original_sample: Optional[torch.Tensor] = None
+
+
+# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
+def betas_for_alpha_bar(
+ num_diffusion_timesteps,
+ max_beta=0.999,
+ alpha_transform_type="cosine",
+):
+ """
+ Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
+ (1-beta) over time from t = [0,1].
+
+ Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
+ to that part of the diffusion process.
+
+
+ Args:
+ num_diffusion_timesteps (`int`): the number of betas to produce.
+ max_beta (`float`): the maximum beta to use; use values lower than 1 to
+ prevent singularities.
+ alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
+ Choose from `cosine` or `exp`
+
+ Returns:
+ betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
+ """
+ if alpha_transform_type == "cosine":
+
+ def alpha_bar_fn(t):
+ return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2
+
+ elif alpha_transform_type == "exp":
+
+ def alpha_bar_fn(t):
+ return math.exp(t * -12.0)
+
+ else:
+ raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}")
+
+ betas = []
+ for i in range(num_diffusion_timesteps):
+ t1 = i / num_diffusion_timesteps
+ t2 = (i + 1) / num_diffusion_timesteps
+ betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta))
+ return torch.tensor(betas, dtype=torch.float32)
+
+
+def rescale_zero_terminal_snr(betas):
+ """
+ Rescales betas to have zero terminal SNR Based on https://arxiv.org/pdf/2305.08891.pdf (Algorithm 1)
+
+
+ Args:
+ betas (`torch.Tensor`):
+ the betas that the scheduler is being initialized with.
+
+ Returns:
+ `torch.Tensor`: rescaled betas with zero terminal SNR
+ """
+ # Convert betas to alphas_bar_sqrt
+ alphas = 1.0 - betas
+ alphas_cumprod = torch.cumprod(alphas, dim=0)
+ alphas_bar_sqrt = alphas_cumprod.sqrt()
+
+ # Store old values.
+ alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone()
+ alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone()
+
+ # Shift so the last timestep is zero.
+ alphas_bar_sqrt -= alphas_bar_sqrt_T
+
+ # Scale so the first timestep is back to the old value.
+ alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T)
+
+ # Convert alphas_bar_sqrt to betas
+ alphas_bar = alphas_bar_sqrt**2 # Revert sqrt
+ alphas = alphas_bar[1:] / alphas_bar[:-1] # Revert cumprod
+ alphas = torch.cat([alphas_bar[0:1], alphas])
+ betas = 1 - alphas
+
+ return betas
+
+class BDIA_DDIMScheduler(SchedulerMixin, ConfigMixin):
+ """
+ `DDIMScheduler` extends the denoising procedure introduced in denoising diffusion probabilistic models (DDPMs) with
+ non-Markovian guidance.
+
+ This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
+ methods the library implements for all schedulers such as loading and saving.
+
+ Args:
+ num_train_timesteps (`int`, defaults to 1000):
+ The number of diffusion steps to train the model.
+ beta_start (`float`, defaults to 0.0001):
+ The starting `beta` value of inference.
+ beta_end (`float`, defaults to 0.02):
+ The final `beta` value.
+ beta_schedule (`str`, defaults to `"linear"`):
+ The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
+ `linear`, `scaled_linear`, or `squaredcos_cap_v2`.
+ trained_betas (`np.ndarray`, *optional*):
+ Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
+ clip_sample (`bool`, defaults to `True`):
+ Clip the predicted sample for numerical stability.
+ clip_sample_range (`float`, defaults to 1.0):
+ The maximum magnitude for sample clipping. Valid only when `clip_sample=True`.
+ set_alpha_to_one (`bool`, defaults to `True`):
+ Each diffusion step uses the alphas product value at that step and at the previous one. For the final step
+ there is no previous alpha. When this option is `True` the previous alpha product is fixed to `1`,
+ otherwise it uses the alpha value at step 0.
+ steps_offset (`int`, defaults to 0):
+ An offset added to the inference steps, as required by some model families.
+ prediction_type (`str`, defaults to `epsilon`, *optional*):
+ Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
+ `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
+ Video](https://imagen.research.google/video/paper.pdf) paper).
+ thresholding (`bool`, defaults to `False`):
+ Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such
+ as Stable Diffusion.
+ dynamic_thresholding_ratio (`float`, defaults to 0.995):
+ The ratio for the dynamic thresholding method. Valid only when `thresholding=True`.
+ sample_max_value (`float`, defaults to 1.0):
+ The threshold value for dynamic thresholding. Valid only when `thresholding=True`.
+ timestep_spacing (`str`, defaults to `"leading"`):
+ The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
+ Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
+ rescale_betas_zero_snr (`bool`, defaults to `False`):
+ Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and
+ dark samples instead of limiting it to samples with medium brightness. Loosely related to
+ [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506).
+ """
+
+ _compatibles = [e.name for e in KarrasDiffusionSchedulers]
+ order = 1
+
+ @register_to_config
+ def __init__(
+ self,
+ num_train_timesteps: int = 1000,
+ beta_start: float = 0.0001,
+ beta_end: float = 0.02,
+ beta_schedule: str = "linear",
+ trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
+ clip_sample: bool = True,
+ set_alpha_to_one: bool = True, #was True
+ steps_offset: int = 0,
+ prediction_type: str = "epsilon",
+ thresholding: bool = False,
+ dynamic_thresholding_ratio: float = 0.995,
+ clip_sample_range: float = 1.0,
+ sample_max_value: float = 1.0,
+ timestep_spacing: str = "leading", #leading
+ rescale_betas_zero_snr: bool = False,
+ gamma: float = 1.0,
+
+ ):
+ if trained_betas is not None:
+ self.betas = torch.tensor(trained_betas, dtype=torch.float32)
+ elif beta_schedule == "linear":
+ self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
+ elif beta_schedule == "scaled_linear":
+ # this schedule is very specific to the latent diffusion model.
+ self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
+ elif beta_schedule == "squaredcos_cap_v2":
+ # Glide cosine schedule
+ self.betas = betas_for_alpha_bar(num_train_timesteps)
+ else:
+ raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}")
+
+ # Rescale for zero SNR
+ if rescale_betas_zero_snr:
+ self.betas = rescale_zero_terminal_snr(self.betas)
+
+ self.alphas = 1.0 - self.betas #may have to add something for last step
+
+ self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
+ # At every step in ddim, we are looking into the previous alphas_cumprod
+ # For the final step, there is no previous alphas_cumprod because we are already at 0
+ # `set_alpha_to_one` decides whether we set this parameter simply to one or
+ # whether we use the final alpha of the "non-previous" one.
+ self.final_alpha_cumprod = torch.tensor(1.0) if set_alpha_to_one else self.alphas_cumprod[0]
+
+ # standard deviation of the initial noise distribution
+ self.init_noise_sigma = 1.0
+
+ # setable values
+ self.num_inference_steps = None
+ self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64))
+ self.next_sample = []
+ self.BDIA = False
+
+
+ def scale_model_input(self, sample: torch.Tensor, timestep: Optional[int] = None) -> torch.Tensor:
+ """
+ Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
+ current timestep.
+
+ Args:
+ sample (`torch.Tensor`):
+ The input sample.
+ timestep (`int`, *optional*):
+ The current timestep in the diffusion chain.
+
+ Returns:
+ `torch.Tensor`:
+ A scaled input sample.
+ """
+ return sample
+
+ def _get_variance(self, timestep, prev_timestep):
+ alpha_prod_t = self.alphas_cumprod[timestep]
+ alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod
+ beta_prod_t = 1 - alpha_prod_t
+ beta_prod_t_prev = 1 - alpha_prod_t_prev
+
+ variance = (beta_prod_t_prev / beta_prod_t) * (1 - alpha_prod_t / alpha_prod_t_prev)
+
+ return variance
+
+ # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
+ def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
+ """
+ "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
+ prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
+ s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
+ pixels from saturation at each step. We find that dynamic thresholding results in significantly better
+ photorealism as well as better image-text alignment, especially when using very large guidance weights."
+
+ https://arxiv.org/abs/2205.11487
+ """
+ dtype = sample.dtype
+ batch_size, channels, *remaining_dims = sample.shape
+
+ if dtype not in (torch.float32, torch.float64):
+ sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half
+
+ # Flatten sample for doing quantile calculation along each image
+ sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))
+
+ abs_sample = sample.abs() # "a certain percentile absolute pixel value"
+
+ s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1)
+ s = torch.clamp(
+ s, min=1, max=self.config.sample_max_value
+ ) # When clamped to min=1, equivalent to standard clipping to [-1, 1]
+ s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0
+ sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s"
+
+ sample = sample.reshape(batch_size, channels, *remaining_dims)
+ sample = sample.to(dtype)
+
+ return sample
+
+ def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
+ """
+ Sets the discrete timesteps used for the diffusion chain (to be run before inference).
+
+ Args:
+ num_inference_steps (`int`):
+ The number of diffusion steps used when generating samples with a pre-trained model.
+ """
+
+ if num_inference_steps > self.config.num_train_timesteps:
+ raise ValueError(
+ f"`num_inference_steps`: {num_inference_steps} cannot be larger than `self.config.train_timesteps`:"
+ f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle"
+ f" maximal {self.config.num_train_timesteps} timesteps."
+ )
+
+ self.num_inference_steps = num_inference_steps
+
+ # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
+ if self.config.timestep_spacing == "linspace":
+ timesteps = (
+ np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps)
+ .round()[::-1]
+ .copy()
+ .astype(np.int64)
+ )
+ elif self.config.timestep_spacing == "leading":
+ step_ratio = self.config.num_train_timesteps // self.num_inference_steps
+ # creates integer timesteps by multiplying by ratio
+ # casting to int to avoid issues when num_inference_step is power of 3
+ timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(np.int64)
+ timesteps += self.config.steps_offset
+ elif self.config.timestep_spacing == "trailing":
+ step_ratio = self.config.num_train_timesteps / self.num_inference_steps
+ # creates integer timesteps by multiplying by ratio
+ # casting to int to avoid issues when num_inference_step is power of 3
+ timesteps = np.round(np.arange(self.config.num_train_timesteps, 0, -step_ratio)).astype(np.int64)
+ timesteps -= 1
+ else:
+ raise ValueError(
+ f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'leading' or 'trailing'."
+ )
+
+ self.timesteps = torch.from_numpy(timesteps).to(device)
+
+ def step(
+ self,
+ model_output: torch.Tensor,
+ timestep: int,
+ sample: torch.Tensor,
+ eta: float = 0.0,
+ use_clipped_model_output: bool = False,
+ generator=None,
+ variance_noise: Optional[torch.Tensor] = None,
+ return_dict: bool = True,
+ debug: bool = False,
+ ) -> Union[DDIMSchedulerOutput, Tuple]:
+ """
+ Predict the sample from the previous timestep by reversing the SDE.
+
+ Args:
+ model_output (torch.Tensor): Direct output from learned diffusion model
+ timestep (int): Current discrete timestep in the diffusion chain
+ sample (torch.Tensor): Current instance of sample created by diffusion process
+ eta (float): Weight of noise for added noise in diffusion step
+ use_clipped_model_output (bool): Whether to use clipped model output
+ generator (torch.Generator, optional): Random number generator
+ variance_noise (torch.Tensor, optional): Pre-generated noise for variance
+ return_dict (bool): Whether to return as DDIMSchedulerOutput or tuple
+ debug (bool): Whether to print debug information
+ """
+ if self.num_inference_steps is None:
+ raise ValueError("Number of inference steps is 'None', run 'set_timesteps' first")
+
+ # Calculate timesteps
+ step_size = self.config.num_train_timesteps // self.num_inference_steps
+ prev_timestep = timestep - step_size
+ next_timestep = timestep + step_size
+
+ if debug:
+ print("\n=== Timestep Information ===")
+ print(f"Current timestep: {timestep}")
+ print(f"Previous timestep: {prev_timestep}")
+ print(f"Next timestep: {next_timestep}")
+ print(f"Step size: {step_size}")
+
+ # Pre-compute alpha and variance values
+ alpha_prod_t = self.alphas_cumprod[timestep]
+ alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod
+ variance = self._get_variance(timestep, prev_timestep)
+ std_dev_t = eta * variance ** 0.5
+
+ # Compute required values
+ alpha_i = alpha_prod_t ** 0.5
+ alpha_i_minus_1 = alpha_prod_t_prev ** 0.5
+ sigma_i = (1 - alpha_prod_t) ** 0.5
+ sigma_i_minus_1 = (1 - alpha_prod_t_prev - std_dev_t**2) ** 0.5
+
+ if debug:
+ print("\n=== Alpha Values ===")
+ print(f"alpha_i: {alpha_i}")
+ print(f"alpha_i_minus_1: {alpha_i_minus_1}")
+ print(f"sigma_i: {sigma_i}")
+ print(f"sigma_i_minus_1: {sigma_i_minus_1}")
+
+ # Predict original sample based on prediction type
+ if self.config.prediction_type == "epsilon":
+ pred_original_sample = (sample - sigma_i * model_output) / alpha_i
+ pred_epsilon = model_output
+ if debug:
+ print("\nPrediction type: epsilon")
+ elif self.config.prediction_type == "sample":
+ pred_original_sample = model_output
+ pred_epsilon = (sample - alpha_i * pred_original_sample) / sigma_i
+ if debug:
+ print("\nPrediction type: sample")
+ elif self.config.prediction_type == "v_prediction":
+ pred_original_sample = alpha_i * sample - sigma_i * model_output
+ pred_epsilon = alpha_i * model_output + sigma_i * sample
+ if debug:
+ print("\nPrediction type: v_prediction")
+ else:
+ raise ValueError(
+ f"prediction_type {self.config.prediction_type} must be one of `epsilon`, `sample`, or `v_prediction`"
+ )
+
+ # Apply thresholding or clipping if configured
+ if self.config.thresholding:
+ if debug:
+ print("\nApplying thresholding")
+ pred_original_sample = self._threshold_sample(pred_original_sample)
+ elif self.config.clip_sample:
+ if debug:
+ print("\nApplying clipping")
+ pred_original_sample = pred_original_sample.clamp(
+ -self.config.clip_sample_range, self.config.clip_sample_range
+ )
+
+ # Recompute pred_epsilon if using clipped model output
+ if use_clipped_model_output:
+ if debug:
+ print("\nUsing clipped model output")
+ pred_epsilon = (sample - alpha_i * pred_original_sample) / sigma_i
+
+ # Compute DDIM step
+ ddim_step = alpha_i_minus_1 * pred_original_sample + sigma_i_minus_1 * pred_epsilon
+
+ # Handle initial DDIM step or BDIA steps
+ if len(self.next_sample) == 0:
+ if debug:
+ print("\nFirst iteration (DDIM)")
+ self.update_next_sample_BDIA(sample)
+ self.update_next_sample_BDIA(ddim_step)
+ else:
+ if debug:
+ print("\nBDIA step")
+ # BDIA implementation
+ alpha_prod_t_next = self.alphas_cumprod[next_timestep]
+ alpha_i_plus_1 = alpha_prod_t_next ** 0.5
+ sigma_i_plus_1 = (1 - alpha_prod_t_next) ** 0.5
+
+ if debug:
+ print(f"alpha_i_plus_1: {alpha_i_plus_1}")
+ print(f"sigma_i_plus_1: {sigma_i_plus_1}")
+
+ a = alpha_i_plus_1 * pred_original_sample + sigma_i_plus_1 * pred_epsilon
+ bdia_step = (
+ self.config.gamma * self.next_sample[-2] +
+ ddim_step -
+ (self.config.gamma * a)
+ )
+ self.update_next_sample_BDIA(bdia_step)
+
+ prev_sample = self.next_sample[-1]
+
+ # Apply variance noise if eta > 0
+ if eta > 0:
+ if debug:
+ print(f"\nApplying variance noise with eta: {eta}")
+
+ if variance_noise is not None and generator is not None:
+ raise ValueError(
+ "Cannot pass both generator and variance_noise. Use either `generator` or `variance_noise`."
+ )
+
+ if variance_noise is None:
+ variance_noise = randn_tensor(
+ model_output.shape,
+ generator=generator,
+ device=model_output.device,
+ dtype=model_output.dtype
+ )
+ prev_sample = prev_sample + std_dev_t * variance_noise
+
+ if not return_dict:
+ return (prev_sample,)
+
+ return DDIMSchedulerOutput(prev_sample=prev_sample, pred_original_sample=pred_original_sample)
+
+ def add_noise(
+ self,
+ original_samples: torch.Tensor,
+ noise: torch.Tensor,
+ timesteps: torch.IntTensor,
+ ) -> torch.Tensor:
+ # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
+ # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
+ # for the subsequent add_noise calls
+ self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device)
+ alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype)
+ timesteps = timesteps.to(original_samples.device)
+
+ sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
+ sqrt_alpha_prod = sqrt_alpha_prod.flatten()
+ while len(sqrt_alpha_prod.shape) < len(original_samples.shape):
+ sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1)
+
+ sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5
+ sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten()
+ while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape):
+ sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1)
+
+ noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise
+ return noisy_samples
+
+ # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity
+ def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor:
+ # Make sure alphas_cumprod and timestep have same device and dtype as sample
+ self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device)
+ alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype)
+ timesteps = timesteps.to(sample.device)
+
+ sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
+ sqrt_alpha_prod = sqrt_alpha_prod.flatten()
+ while len(sqrt_alpha_prod.shape) < len(sample.shape):
+ sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1)
+
+ sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5
+ sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten()
+ while len(sqrt_one_minus_alpha_prod.shape) < len(sample.shape):
+ sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1)
+
+ velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample
+ return velocity
+
+ def update_next_sample_BDIA(self, new_value):
+ self.next_sample.append(new_value.clone())
+
+
+ def __len__(self):
+ return self.config.num_train_timesteps
\ No newline at end of file
diff --git a/modules/sd_samplers_diffusers.py b/modules/sd_samplers_diffusers.py
index 4672df92e..7c23d4342 100644
--- a/modules/sd_samplers_diffusers.py
+++ b/modules/sd_samplers_diffusers.py
@@ -52,6 +52,7 @@
from modules.schedulers.scheduler_dc import DCSolverMultistepScheduler # pylint: disable=ungrouped-imports
from modules.schedulers.scheduler_vdm import VDMScheduler # pylint: disable=ungrouped-imports
from modules.schedulers.scheduler_dpm_flowmatch import FlowMatchDPMSolverMultistepScheduler # pylint: disable=ungrouped-imports
+ from modules.schedulers.scheduler_bdia import BDIA_DDIMScheduler # pylint: disable=ungrouped-imports
except Exception as e:
shared.log.error(f'Diffusers import error: version={diffusers.__version__} error: {e}')
if os.environ.get('SD_SAMPLER_DEBUG', None) is not None:
@@ -97,6 +98,7 @@
'VDM Solver': { 'clip_sample_range': 2.0, },
'LCM': { 'beta_start': 0.00085, 'beta_end': 0.012, 'beta_schedule': "scaled_linear", 'set_alpha_to_one': True, 'rescale_betas_zero_snr': False, 'thresholding': False, 'timestep_spacing': 'linspace' },
'TCD': { 'set_alpha_to_one': True, 'rescale_betas_zero_snr': False, 'beta_schedule': 'scaled_linear' },
+ 'BDIA DDIM': { 'clip_sample': False, 'set_alpha_to_one': True, 'steps_offset': 0, 'clip_sample_range': 1.0, 'sample_max_value': 1.0, 'timestep_spacing': 'leading', 'rescale_betas_zero_snr': False, 'thresholding': False, 'gamma': 1.0 },
'PNDM': { 'skip_prk_steps': False, 'set_alpha_to_one': False, 'steps_offset': 0, 'timestep_spacing': 'linspace' },
'IPNDM': { },
@@ -142,6 +144,7 @@
sd_samplers_common.SamplerData('SA Solver', lambda model: DiffusionSampler('SA Solver', SASolverScheduler, model), [], {}),
sd_samplers_common.SamplerData('DC Solver', lambda model: DiffusionSampler('DC Solver', DCSolverMultistepScheduler, model), [], {}),
sd_samplers_common.SamplerData('VDM Solver', lambda model: DiffusionSampler('VDM Solver', VDMScheduler, model), [], {}),
+ sd_samplers_common.SamplerData('BDIA DDIM', lambda model: DiffusionSampler('BDIA DDIM g=0', BDIA_DDIMScheduler, model), [], {}),
sd_samplers_common.SamplerData('PNDM', lambda model: DiffusionSampler('PNDM', PNDMScheduler, model), [], {}),
sd_samplers_common.SamplerData('IPNDM', lambda model: DiffusionSampler('IPNDM', IPNDMScheduler, model), [], {}),
diff --git a/modules/shared.py b/modules/shared.py
index 10167c809..269510b08 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -20,7 +20,7 @@
from modules.paths import models_path, script_path, data_path, sd_configs_path, sd_default_config, sd_model_file, default_sd_model_file, extensions_dir, extensions_builtin_dir # pylint: disable=W0611
from modules.dml import memory_providers, default_memory_provider, directml_do_hijack
from modules.onnx_impl import initialize_onnx, execution_providers
-from modules.memstats import memory_stats
+from modules.memstats import memory_stats, ram_stats # pylint: disable=unused-import
from modules.ui_components import DropdownEditable
import modules.interrogate
import modules.memmon
From 9acfb4431d8cacceed8425eb9e68e737f7e8913f Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Mon, 2 Dec 2024 11:30:30 -0500
Subject: [PATCH 074/162] update light theme
Signed-off-by: Vladimir Mandic
---
extensions-builtin/sdnext-modernui | 2 +-
javascript/light-teal.css | 8 ++++----
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui
index f083ce41a..3008cee4b 160000
--- a/extensions-builtin/sdnext-modernui
+++ b/extensions-builtin/sdnext-modernui
@@ -1 +1 @@
-Subproject commit f083ce41a9f18b500f26745ea9e86855e509d2cb
+Subproject commit 3008cee4b67bb00f8f1a4fe4510ec27ba92aa418
diff --git a/javascript/light-teal.css b/javascript/light-teal.css
index 28bf03e6f..174622e52 100644
--- a/javascript/light-teal.css
+++ b/javascript/light-teal.css
@@ -20,9 +20,9 @@
--body-text-color: var(--neutral-800);
--body-text-color-subdued: var(--neutral-600);
--background-color: #FFFFFF;
- --background-fill-primary: var(--neutral-400);
+ --background-fill-primary: var(--neutral-300);
--input-padding: 4px;
- --input-background-fill: var(--neutral-300);
+ --input-background-fill: var(--neutral-200);
--input-shadow: 2px 2px 2px 2px var(--neutral-500);
--button-secondary-text-color: black;
--button-secondary-background-fill: linear-gradient(to bottom right, var(--neutral-200), var(--neutral-500));
@@ -291,8 +291,8 @@ svg.feather.feather-image, .feather .feather-image { display: none }
--slider-color: ;
--stat-background-fill: linear-gradient(to right, var(--primary-400), var(--primary-600));
--table-border-color: var(--neutral-700);
- --table-even-background-fill: #222222;
- --table-odd-background-fill: #333333;
+ --table-even-background-fill: #FFFFFF;
+ --table-odd-background-fill: #CCCCCC;
--table-radius: var(--radius-lg);
--table-row-focus: var(--color-accent-soft);
}
From 52ea1813898f0a809e16600ae5980bae8f0887c2 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Mon, 2 Dec 2024 12:26:27 -0500
Subject: [PATCH 075/162] update requirements and notes
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 9 +++++++--
TODO.md | 3 ++-
requirements.txt | 2 +-
3 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8091176e8..748ac6f04 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,13 +29,13 @@
style-aligned applies selected attention layers uniformly to all images to achive consistency
can be used with or without input image in which case first prompt is used to establish baseline
*note:* all prompts are processes as a single batch, so vram is limiting factor
-- **OpenVINO**: update to 2024.5.0
### UI and workflow improvements
- **LoRA** handler rewrite:
- LoRA weights are no longer calculated on-the-fly during model execution, but are pre-calculated at the start
this results in perceived overhead on generate startup, but results in overall faster execution as LoRA does not need to be processed on each step
+ thanks @AI-Casanova
- *note*: LoRA weights backups are required so LoRA can be unapplied, but can take quite a lot of system memory
if you know you will not need to unapply LoRA, you can disable backups in *settings -> networks -> lora fuse*
in which case, you need to reload model to unapply LoRA
@@ -51,6 +51,7 @@
- faster and more compatible *balanced* mode
- balanced offload: units are now in percentage instead of bytes
- balanced offload: add both high and low watermark
+ *note*: balanced offload is recommended method for offload when using any large models such as sd35 or flux
- **UI**:
- improved stats on generate completion
- improved live preview display and performance
@@ -60,7 +61,11 @@
- control: optionn to hide input column
- control: add stats
- browser -> server logging framework
- - add addtional themes: `black-reimagined`
+ - add addtional themes: `black-reimagined`, thanks @Artheriax
+
+### Updates
+
+- **OpenVINO**: update to 2024.5.0
- **Sampler** improvements
- Euler FlowMatch: add sigma methods (*karras/exponential/betas*)
- DPM FlowMatch: update all and add sigma methods
diff --git a/TODO.md b/TODO.md
index 73008039d..90372e41f 100644
--- a/TODO.md
+++ b/TODO.md
@@ -9,7 +9,8 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma
- Flux IPAdapter:
- Flux NF4:
- SANA:
-- LTX-Video:
+- LTX-Video:
+- TorchAO:
## Other
diff --git a/requirements.txt b/requirements.txt
index 12a9f85cb..3b1b14c7d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -52,7 +52,7 @@ numba==0.59.1
protobuf==4.25.3
pytorch_lightning==1.9.4
tokenizers==0.20.3
-transformers==4.46.2
+transformers==4.46.3
urllib3==1.26.19
Pillow==10.4.0
timm==0.9.16
From bd192d2991dd8895117e87ed892bc058f84b4cf1 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Mon, 2 Dec 2024 16:27:22 -0500
Subject: [PATCH 076/162] remove tracemalloc
Signed-off-by: Vladimir Mandic
---
launch.py | 3 ---
modules/cmd_args.py | 1 -
modules/processing.py | 8 --------
3 files changed, 12 deletions(-)
diff --git a/launch.py b/launch.py
index 5c8a6051a..e00da58c7 100755
--- a/launch.py
+++ b/launch.py
@@ -192,9 +192,6 @@ def main():
global args # pylint: disable=global-statement
installer.ensure_base_requirements()
init_args() # setup argparser and default folders
- if args.malloc:
- import tracemalloc
- tracemalloc.start()
installer.args = args
installer.setup_logging()
installer.log.info('Starting SD.Next')
diff --git a/modules/cmd_args.py b/modules/cmd_args.py
index cb4e5fc16..752ad02c0 100644
--- a/modules/cmd_args.py
+++ b/modules/cmd_args.py
@@ -26,7 +26,6 @@ def main_args():
group_diag.add_argument("--no-hashing", default=os.environ.get("SD_NOHASHING", False), action='store_true', help="Disable hashing of checkpoints, default: %(default)s")
group_diag.add_argument("--no-metadata", default=os.environ.get("SD_NOMETADATA", False), action='store_true', help="Disable reading of metadata from models, default: %(default)s")
group_diag.add_argument("--profile", default=os.environ.get("SD_PROFILE", False), action='store_true', help="Run profiler, default: %(default)s")
- group_diag.add_argument("--malloc", default=os.environ.get("SD_PROFILE", False), action='store_true', help="Trace memory ops, default: %(default)s")
group_diag.add_argument("--disable-queue", default=os.environ.get("SD_DISABLEQUEUE", False), action='store_true', help="Disable queues, default: %(default)s")
group_diag.add_argument('--debug', default=os.environ.get("SD_DEBUG", False), action='store_true', help = "Run installer with debug logging, default: %(default)s")
diff --git a/modules/processing.py b/modules/processing.py
index 095eba54c..57512850a 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -475,13 +475,5 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
if not p.disable_extra_networks:
shared.log.info(f'Processed: images={len(output_images)} its={(p.steps * len(output_images)) / (t1 - t0):.2f} time={t1-t0:.2f} timers={timer.process.dct(min_time=0.02)} memory={memstats.memory_stats()}')
- if shared.cmd_opts.malloc:
- import tracemalloc
- snapshot = tracemalloc.take_snapshot()
- stats = snapshot.statistics('lineno')
- shared.log.debug('Profile malloc:')
- for stat in stats[:20]:
- frame = stat.traceback[0]
- shared.log.debug(f' file="{frame.filename}":{frame.lineno} size={stat.size}')
devices.torch_gc(force=True)
return processed
From e3f06734d04bec232ddf234e9fa550375fde9638 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Tue, 3 Dec 2024 08:52:53 -0500
Subject: [PATCH 077/162] samplers add custom sigma
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 1 +
installer.py | 2 +-
modules/processing_args.py | 15 +++++++++++++++
3 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 748ac6f04..51d61aa46 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -68,6 +68,7 @@
- **OpenVINO**: update to 2024.5.0
- **Sampler** improvements
- Euler FlowMatch: add sigma methods (*karras/exponential/betas*)
+ - Euler FlowMatch: allow using timestep presets to set sigmas
- DPM FlowMatch: update all and add sigma methods
- BDIA-DDIM: *experimental*
diff --git a/installer.py b/installer.py
index c849ac5b6..e03512301 100644
--- a/installer.py
+++ b/installer.py
@@ -459,7 +459,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
def check_diffusers():
if args.skip_all or args.skip_requirements:
return
- sha = 'c96bfa5c80eca798d555a79a491043c311d0f608'
+ sha = '63b631f38336f56755fb5cf15d9b0fb70bbf6323' # diffusers commit hash
pkg = pkg_resources.working_set.by_key.get('diffusers', None)
minor = int(pkg.version.split('.')[1] if pkg is not None else 0)
cur = opts.get('diffusers_version', '') if minor > 0 else ''
diff --git a/modules/processing_args.py b/modules/processing_args.py
index 4ce552825..d73762d29 100644
--- a/modules/processing_args.py
+++ b/modules/processing_args.py
@@ -186,6 +186,21 @@ def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2
shared.log.error(f'Sampler timesteps: {e}')
else:
shared.log.warning(f'Sampler: sampler={model.scheduler.__class__.__name__} timesteps not supported')
+ if 'sigmas' in possible:
+ sigmas = re.split(',| ', shared.opts.schedulers_timesteps)
+ sigmas = [float(x)/1000.0 for x in sigmas if x.isdigit()]
+ if len(sigmas) > 0:
+ if hasattr(model.scheduler, 'set_timesteps') and "sigmas" in set(inspect.signature(model.scheduler.set_timesteps).parameters.keys()):
+ try:
+ args['sigmas'] = sigmas
+ p.steps = len(sigmas)
+ p.timesteps = sigmas
+ steps = p.steps
+ shared.log.debug(f'Sampler: steps={len(sigmas)} sigmas={sigmas}')
+ except Exception as e:
+ shared.log.error(f'Sampler sigmas: {e}')
+ else:
+ shared.log.warning(f'Sampler: sampler={model.scheduler.__class__.__name__} sigmas not supported')
if hasattr(model, 'scheduler') and hasattr(model.scheduler, 'noise_sampler_seed') and hasattr(model.scheduler, 'noise_sampler'):
model.scheduler.noise_sampler = None # noise needs to be reset instead of using cached values
From 1edf657247a84922405494365137772069f125e0 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Tue, 3 Dec 2024 09:46:23 -0500
Subject: [PATCH 078/162] add nvml charts
Signed-off-by: Vladimir Mandic
---
extensions-builtin/sd-extension-system-info | 2 +-
javascript/nvml.js | 32 +++++++++++++++++++++
2 files changed, 33 insertions(+), 1 deletion(-)
diff --git a/extensions-builtin/sd-extension-system-info b/extensions-builtin/sd-extension-system-info
index 6a2a28a4f..dfa01ce99 160000
--- a/extensions-builtin/sd-extension-system-info
+++ b/extensions-builtin/sd-extension-system-info
@@ -1 +1 @@
-Subproject commit 6a2a28a4f674b85e09824384ad842b801426b491
+Subproject commit dfa01ce99a17d76b45284ef28cef018ff52ac353
diff --git a/javascript/nvml.js b/javascript/nvml.js
index cf0187367..39850c9d8 100644
--- a/javascript/nvml.js
+++ b/javascript/nvml.js
@@ -1,6 +1,32 @@
let nvmlInterval = null; // eslint-disable-line prefer-const
let nvmlEl = null;
let nvmlTable = null;
+const chartData = { mem: [], load: [] };
+
+async function updateNVMLChart(mem, load) {
+ const maxLen = 120;
+ const colorRangeMap = $.range_map({
+ '0:5': '#fffafa',
+ '6:10': '#fff7ed',
+ '11:20': '#fed7aa',
+ '21:30': '#fdba74',
+ '31:40': '#fb923c',
+ '41:50': '#f97316',
+ '51:60': '#ea580c',
+ '61:70': '#c2410c',
+ '71:80': '#9a3412',
+ '81:90': '#7c2d12',
+ '91:100': '#6c2e12',
+ });
+ const sparklineConfigLOAD = { type: 'bar', height: '100px', barWidth: '2px', barSpacing: '1px', chartRangeMin: 0, chartRangeMax: 100, barColor: '#89007D' };
+ const sparklineConfigMEM = { type: 'bar', height: '100px', barWidth: '2px', barSpacing: '1px', chartRangeMin: 0, chartRangeMax: 100, colorMap: colorRangeMap, composite: true };
+ if (chartData.load.length > maxLen) chartData.load.shift();
+ chartData.load.push(load);
+ if (chartData.mem.length > maxLen) chartData.mem.shift();
+ chartData.mem.push(mem);
+ $('#nvmlChart').sparkline(chartData.load, sparklineConfigLOAD);
+ $('#nvmlChart').sparkline(chartData.mem, sparklineConfigMEM);
+}
async function updateNVML() {
try {
@@ -35,6 +61,9 @@ async function updateNVML() {
State | ${gpu.state} |
`;
nvmlTbody.innerHTML = rows;
+ const mem = 100 * (gpu.memory?.used || 0) / (gpu.memory?.total || 1);
+ const load = 100 * (gpu.clock?.gpu?.[0] || 0) / (gpu.clock?.gpu?.[1] || 1);
+ updateNVMLChart(mem, load);
}
nvmlEl.style.display = 'block';
} catch (e) {
@@ -56,7 +85,10 @@ async function initNVML() {
| |
`;
+ const nvmlChart = document.createElement('div');
+ nvmlChart.id = 'nvmlChart';
nvmlEl.appendChild(nvmlTable);
+ nvmlEl.appendChild(nvmlChart);
gradioApp().appendChild(nvmlEl);
log('initNVML');
}
From cd44f4ebf393e87420edefcdff3a046e9d5139eb Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Tue, 3 Dec 2024 09:52:15 -0500
Subject: [PATCH 079/162] cleanup nvml
Signed-off-by: Vladimir Mandic
---
javascript/nvml.js | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/javascript/nvml.js b/javascript/nvml.js
index 39850c9d8..0a82cba1b 100644
--- a/javascript/nvml.js
+++ b/javascript/nvml.js
@@ -5,7 +5,7 @@ const chartData = { mem: [], load: [] };
async function updateNVMLChart(mem, load) {
const maxLen = 120;
- const colorRangeMap = $.range_map({
+ const colorRangeMap = $.range_map({ // eslint-disable-line no-undef
'0:5': '#fffafa',
'6:10': '#fff7ed',
'11:20': '#fed7aa',
@@ -24,8 +24,8 @@ async function updateNVMLChart(mem, load) {
chartData.load.push(load);
if (chartData.mem.length > maxLen) chartData.mem.shift();
chartData.mem.push(mem);
- $('#nvmlChart').sparkline(chartData.load, sparklineConfigLOAD);
- $('#nvmlChart').sparkline(chartData.mem, sparklineConfigMEM);
+ $('#nvmlChart').sparkline(chartData.load, sparklineConfigLOAD); // eslint-disable-line no-undef
+ $('#nvmlChart').sparkline(chartData.mem, sparklineConfigMEM); // eslint-disable-line no-undef
}
async function updateNVML() {
@@ -61,9 +61,7 @@ async function updateNVML() {
State | ${gpu.state} |
`;
nvmlTbody.innerHTML = rows;
- const mem = 100 * (gpu.memory?.used || 0) / (gpu.memory?.total || 1);
- const load = 100 * (gpu.clock?.gpu?.[0] || 0) / (gpu.clock?.gpu?.[1] || 1);
- updateNVMLChart(mem, load);
+ updateNVMLChart(gpu.load.memory, gpu.load.gpu);
}
nvmlEl.style.display = 'block';
} catch (e) {
From e41d9f52cdf0fceeecdb456cd38256dc8e57b3e7 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Wed, 4 Dec 2024 08:47:22 -0500
Subject: [PATCH 080/162] add offload warning
Signed-off-by: Vladimir Mandic
---
modules/sd_models.py | 17 +++++++++++------
wiki | 2 +-
2 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 37567962c..101ff837b 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -34,6 +34,7 @@
debug_process = shared.log.trace if os.environ.get('SD_PROCESS_DEBUG', None) is not None else lambda *args, **kwargs: None
diffusers_version = int(diffusers.__version__.split('.')[1])
checkpoint_tiles = checkpoint_titles # legacy compatibility
+should_offload = ['sc', 'sd3', 'f1', 'hunyuandit', 'auraflow', 'omnigen']
class NoWatermark:
@@ -320,11 +321,15 @@ def set_diffuser_offload(sd_model, op: str = 'model'):
return
if not (hasattr(sd_model, "has_accelerate") and sd_model.has_accelerate):
sd_model.has_accelerate = False
- if hasattr(sd_model, 'maybe_free_model_hooks') and shared.opts.diffusers_offload_mode == "none":
- shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} limit={shared.opts.cuda_mem_fraction}')
- sd_model.maybe_free_model_hooks()
- sd_model.has_accelerate = False
- if hasattr(sd_model, "enable_model_cpu_offload") and shared.opts.diffusers_offload_mode == "model":
+ if shared.opts.diffusers_offload_mode == "none":
+ if shared.sd_model_type in should_offload:
+ shared.log.warning(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} type={shared.sd_model.__class__.__name__} large model')
+ else:
+ shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} limit={shared.opts.cuda_mem_fraction}')
+ if hasattr(sd_model, 'maybe_free_model_hooks'):
+ sd_model.maybe_free_model_hooks()
+ sd_model.has_accelerate = False
+ if shared.opts.diffusers_offload_mode == "model" and hasattr(sd_model, "enable_model_cpu_offload"):
try:
shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} limit={shared.opts.cuda_mem_fraction}')
if shared.opts.diffusers_move_base or shared.opts.diffusers_move_unet or shared.opts.diffusers_move_refiner:
@@ -339,7 +344,7 @@ def set_diffuser_offload(sd_model, op: str = 'model'):
set_accelerate(sd_model)
except Exception as e:
shared.log.error(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} {e}')
- if hasattr(sd_model, "enable_sequential_cpu_offload") and shared.opts.diffusers_offload_mode == "sequential":
+ if shared.opts.diffusers_offload_mode == "sequential" and hasattr(sd_model, "enable_sequential_cpu_offload"):
try:
shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} limit={shared.opts.cuda_mem_fraction}')
if shared.opts.diffusers_move_base or shared.opts.diffusers_move_unet or shared.opts.diffusers_move_refiner:
diff --git a/wiki b/wiki
index f57cdb49d..2a83f725b 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit f57cdb49d8ca928024b43525897d1c1379eab4c4
+Subproject commit 2a83f725bda6a81399f579ba7102741f71b0be39
From ca060026020ccd8c1720fedc25fa2bcea0478d46 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Wed, 4 Dec 2024 09:25:57 -0500
Subject: [PATCH 081/162] lora maintain device
Signed-off-by: Vladimir Mandic
---
modules/lora/networks.py | 63 ++++++++++++++++--------------
modules/model_stablecascade.py | 6 +--
modules/processing_diffusers.py | 12 ++----
modules/processing_info.py | 1 +
modules/prompt_parser_diffusers.py | 4 --
modules/sd_models.py | 2 +
6 files changed, 42 insertions(+), 46 deletions(-)
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 14fce760a..735c00c4c 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -307,7 +307,7 @@ def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=Non
# section: process loaded networks
-def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], weight, network_layer_name, wanted_names):
+def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], weight: torch.Tensor, network_layer_name: str, wanted_names: tuple):
global bnb # pylint: disable=W0603
backup_size = 0
if len(loaded_networks) > 0 and network_layer_name is not None and any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419 # pylint: disable=R1729
@@ -356,7 +356,7 @@ def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.n
return backup_size
-def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], weight, network_layer_name):
+def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], weight: torch.Tensor, network_layer_name: str):
if shared.opts.diffusers_offload_mode == "none":
self.to(devices.device, non_blocking=True)
batch_updown = None
@@ -403,7 +403,7 @@ def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
return batch_updown, batch_ex_bias
-def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown, ex_bias):
+def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown: torch.Tensor, ex_bias: torch.Tensor, orig_device: torch.device):
t0 = time.time()
weights_backup = getattr(self, "network_weights_backup", None)
bias_backup = getattr(self, "network_bias_backup", None)
@@ -421,10 +421,10 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
if getattr(self, "quant_type", None) in ['nf4', 'fp4'] and bnb is not None:
self.weight = bnb.nn.Params4bit(new_weight, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
else:
- self.weight = torch.nn.Parameter(new_weight, requires_grad=False)
+ self.weight = torch.nn.Parameter(new_weight.to(device=orig_device, non_blocking=True), requires_grad=False)
del new_weight
else:
- self.weight = torch.nn.Parameter(weights_backup, requires_grad=False)
+ self.weight = torch.nn.Parameter(weights_backup.to(device=orig_device, non_blocking=True), requires_grad=False)
if hasattr(self, "qweight") and hasattr(self, "freeze"):
self.freeze()
if bias_backup is not None:
@@ -434,10 +434,10 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
self.bias = None
if ex_bias is not None:
new_weight = bias_backup.to(devices.device, non_blocking=True) + ex_bias.to(devices.device, non_blocking=True)
- self.bias = torch.nn.Parameter(new_weight, requires_grad=False)
+ self.bias = torch.nn.Parameter(new_weight.to(device=orig_device, non_blocking=True), requires_grad=False)
del new_weight
else:
- self.bias = torch.nn.Parameter(bias_backup, requires_grad=False)
+ self.bias = torch.nn.Parameter(bias_backup.to(device=orig_device, non_blocking=True), requires_grad=False)
else:
self.bias = None
t1 = time.time()
@@ -457,14 +457,15 @@ def network_activate():
if shared.opts.diffusers_offload_mode == "sequential":
sd_models.disable_offload(sd_model)
sd_models.move_model(sd_model, device=devices.cpu)
- modules = []
+ modules = {}
for component_name in ['text_encoder','text_encoder_2', 'unet', 'transformer']:
component = getattr(sd_model, component_name, None)
if component is not None and hasattr(component, 'named_modules'):
- modules += list(component.named_modules())
+ modules[component_name] = list(component.named_modules())
+ total = sum(len(x) for x in modules.values())
if len(loaded_networks) > 0:
pbar = rp.Progress(rp.TextColumn('[cyan]Apply network: type=LoRA'), rp.BarColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console)
- task = pbar.add_task(description='' , total=len(modules))
+ task = pbar.add_task(description='' , total=total)
else:
task = None
pbar = nullcontext()
@@ -474,29 +475,31 @@ def network_activate():
backup_size = 0
weights_devices = []
weights_dtypes = []
- for _, module in modules:
- network_layer_name = getattr(module, 'network_layer_name', None)
- current_names = getattr(module, "network_current_names", ())
- if shared.state.interrupted or network_layer_name is None or current_names == wanted_names:
+ for component in modules.keys():
+ orig_device = getattr(sd_model, component, None).device
+ for _, module in modules[component]:
+ network_layer_name = getattr(module, 'network_layer_name', None)
+ current_names = getattr(module, "network_current_names", ())
+ if shared.state.interrupted or network_layer_name is None or current_names == wanted_names:
+ if task is not None:
+ pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} skip')
+ continue
+ weight = getattr(module, 'weight', None)
+ weight = weight.to(devices.device, non_blocking=True) if weight is not None else None
+ backup_size += network_backup_weights(module, weight, network_layer_name, wanted_names)
+ batch_updown, batch_ex_bias = network_calc_weights(module, weight, network_layer_name)
+ weights_device, weights_dtype = network_apply_weights(module, batch_updown, batch_ex_bias, orig_device)
+ weights_devices.append(weights_device)
+ weights_dtypes.append(weights_dtype)
+ if batch_updown is not None or batch_ex_bias is not None:
+ applied += 1
+ del weight, batch_updown, batch_ex_bias
+ module.network_current_names = wanted_names
if task is not None:
- pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} skip')
- continue
- weight = getattr(module, 'weight', None)
- weight = weight.to(devices.device, non_blocking=True) if weight is not None else None
- backup_size += network_backup_weights(module, weight, network_layer_name, wanted_names)
- batch_updown, batch_ex_bias = network_calc_weights(module, weight, network_layer_name)
- weights_device, weights_dtype = network_apply_weights(module, batch_updown, batch_ex_bias)
- weights_devices.append(weights_device)
- weights_dtypes.append(weights_dtype)
- if batch_updown is not None or batch_ex_bias is not None:
- applied += 1
- del weight, batch_updown, batch_ex_bias
- module.network_current_names = wanted_names
- if task is not None:
- pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} modules={len(modules)} apply={applied} backup={backup_size}')
+ pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} modules={len(modules)} apply={applied} backup={backup_size}')
weights_devices, weights_dtypes = list(set([x for x in weights_devices if x is not None])), list(set([x for x in weights_dtypes if x is not None])) # noqa: C403 # pylint: disable=R1718
if debug and len(loaded_networks) > 0:
- shared.log.debug(f'Load network: type=LoRA networks={len(loaded_networks)} modules={len(modules)} apply={applied} device={weights_devices} dtype={weights_dtypes} backup={backup_size} fuse={shared.opts.lora_fuse_diffusers} time={get_timers()}')
+ shared.log.debug(f'Load network: type=LoRA networks={len(loaded_networks)} modules={total} apply={applied} device={weights_devices} dtype={weights_dtypes} backup={backup_size} fuse={shared.opts.lora_fuse_diffusers} time={get_timers()}')
modules.clear()
if shared.opts.diffusers_offload_mode == "sequential":
sd_models.set_diffuser_offload(sd_model, op="model")
diff --git a/modules/model_stablecascade.py b/modules/model_stablecascade.py
index d6f9e4266..2a7739e55 100644
--- a/modules/model_stablecascade.py
+++ b/modules/model_stablecascade.py
@@ -187,8 +187,7 @@ def __call__(
callback_on_step_end=None,
callback_on_step_end_tensor_inputs=["latents"],
):
- if shared.opts.diffusers_offload_mode == "balanced":
- shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+ shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
# 0. Define commonly used variables
self.guidance_scale = guidance_scale
self.do_classifier_free_guidance = self.guidance_scale > 1
@@ -330,8 +329,7 @@ def __call__(
elif output_type == "pil":
images = images.permute(0, 2, 3, 1).cpu().float().numpy() # float() as bfloat16-> numpy doesnt work
images = self.numpy_to_pil(images)
- if shared.opts.diffusers_offload_mode == "balanced":
- shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+ shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
else:
images = latents
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index c605a761c..0341cac4d 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -83,8 +83,7 @@ def process_base(p: processing.StableDiffusionProcessing):
try:
t0 = time.time()
sd_models_compile.check_deepcache(enable=True)
- if shared.opts.diffusers_offload_mode == "balanced":
- shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+ shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
sd_models.move_model(shared.sd_model, devices.device)
if hasattr(shared.sd_model, 'unet'):
sd_models.move_model(shared.sd_model.unet, devices.device)
@@ -266,8 +265,7 @@ def process_refine(p: processing.StableDiffusionProcessing, output):
if shared.state.interrupted or shared.state.skipped:
shared.sd_model = orig_pipeline
return output
- if shared.opts.diffusers_offload_mode == "balanced":
- shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+ shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
if shared.opts.diffusers_move_refiner:
sd_models.move_model(shared.sd_refiner, devices.device)
if hasattr(shared.sd_refiner, 'unet'):
@@ -407,8 +405,7 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
shared.sd_model = orig_pipeline
return results
- if shared.opts.diffusers_offload_mode == "balanced":
- shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+ shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
# sanitize init_images
if hasattr(p, 'init_images') and getattr(p, 'init_images', None) is None:
@@ -463,8 +460,7 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
shared.sd_model = orig_pipeline
- if shared.opts.diffusers_offload_mode == "balanced":
- shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+ shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
if p.state == '':
global last_p # pylint: disable=global-statement
diff --git a/modules/processing_info.py b/modules/processing_info.py
index 714ebf35f..e0fca12ae 100644
--- a/modules/processing_info.py
+++ b/modules/processing_info.py
@@ -140,6 +140,7 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No
if sd_hijack is not None and hasattr(sd_hijack.model_hijack, 'embedding_db') and len(sd_hijack.model_hijack.embedding_db.embeddings_used) > 0: # this is for original hijaacked models only, diffusers are handled separately
args["Embeddings"] = ', '.join(sd_hijack.model_hijack.embedding_db.embeddings_used)
# samplers
+
if getattr(p, 'sampler_name', None) is not None:
args["Sampler eta delta"] = shared.opts.eta_noise_seed_delta if shared.opts.eta_noise_seed_delta != 0 and sd_samplers_common.is_sampler_using_eta_noise_seed_delta(p) else None
args["Sampler eta multiplier"] = p.initial_noise_multiplier if getattr(p, 'initial_noise_multiplier', 1.0) != 1.0 else None
diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py
index d2093351a..06c0b6012 100644
--- a/modules/prompt_parser_diffusers.py
+++ b/modules/prompt_parser_diffusers.py
@@ -39,8 +39,6 @@ def prepare_model(pipe = None):
pipe = pipe.pipe
if not hasattr(pipe, "text_encoder"):
return None
- # if shared.opts.diffusers_offload_mode == "balanced":
- # pipe = sd_models.apply_balanced_offload(pipe)
elif hasattr(pipe, "maybe_free_model_hooks"):
pipe.maybe_free_model_hooks()
devices.torch_gc()
@@ -79,8 +77,6 @@ def __init__(self, prompts, negative_prompts, steps, clip_skip, p):
self.scheduled_encode(pipe, batchidx)
else:
self.encode(pipe, prompt, negative_prompt, batchidx)
- # if shared.opts.diffusers_offload_mode == "balanced":
- # pipe = sd_models.apply_balanced_offload(pipe)
self.checkcache(p)
debug(f"Prompt encode: time={(time.time() - t0):.3f}")
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 101ff837b..63ec6b327 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -420,6 +420,8 @@ def detach_hook(self, module):
def apply_balanced_offload(sd_model):
global offload_hook_instance # pylint: disable=global-statement
+ if shared.opts.diffusers_offload_mode != "balanced":
+ return sd_model
if offload_hook_instance is None or offload_hook_instance.min_watermark != shared.opts.diffusers_offload_min_gpu_memory or offload_hook_instance.max_watermark != shared.opts.diffusers_offload_max_gpu_memory:
offload_hook_instance = OffloadHook()
t0 = time.time()
From 2965045993acaf376bb9fd14f29c500cbc298c46 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Thu, 5 Dec 2024 07:58:52 -0500
Subject: [PATCH 082/162] change offload and upcast defaults
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 12 +++++++++---
configs/flux/vae/config.json | 2 +-
configs/sd15/vae/config.json | 1 +
configs/sd3/vae/config.json | 2 +-
configs/sdxl/vae/config.json | 2 +-
modules/shared.py | 14 +++++++-------
6 files changed, 20 insertions(+), 13 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 51d61aa46..d49c1c555 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -47,11 +47,17 @@
- Flux: all-in-one safetensors
example:
- Flux: do not recast quants
-- **Offload** improvements:
- - faster and more compatible *balanced* mode
+- **Memory** improvements:
+ - faster and more compatible *balanced offload* mode
- balanced offload: units are now in percentage instead of bytes
- balanced offload: add both high and low watermark
- *note*: balanced offload is recommended method for offload when using any large models such as sd35 or flux
+ default is 25% for low-watermark (skip offload if memory usage is below 25%) and 70% high-watermark (must offload if memory usage is above 70%)
+ - change-in-behavior:
+ `lowvrwam` triggers *sequential offload*, also automatically triggered on systems with <=4GB vram
+ all other systems use *balanced offload* by default (can be changed in settings)
+ previous behavior was to use *model offload* on systems with <=8GB and `medvram` and no offload by default
+ - VAE upcase is now disabled by default on all systems
+ if you have issues with image decode, you'll need to enable it manually
- **UI**:
- improved stats on generate completion
- improved live preview display and performance
diff --git a/configs/flux/vae/config.json b/configs/flux/vae/config.json
index b43183d0f..7ecb342c2 100644
--- a/configs/flux/vae/config.json
+++ b/configs/flux/vae/config.json
@@ -14,7 +14,7 @@
"DownEncoderBlock2D",
"DownEncoderBlock2D"
],
- "force_upcast": true,
+ "force_upcast": false,
"in_channels": 3,
"latent_channels": 16,
"latents_mean": null,
diff --git a/configs/sd15/vae/config.json b/configs/sd15/vae/config.json
index 55d78924f..2cba0e824 100644
--- a/configs/sd15/vae/config.json
+++ b/configs/sd15/vae/config.json
@@ -14,6 +14,7 @@
"DownEncoderBlock2D",
"DownEncoderBlock2D"
],
+ "force_upcast": false,
"in_channels": 3,
"latent_channels": 4,
"layers_per_block": 2,
diff --git a/configs/sd3/vae/config.json b/configs/sd3/vae/config.json
index 58e7764fb..f6f4e8684 100644
--- a/configs/sd3/vae/config.json
+++ b/configs/sd3/vae/config.json
@@ -15,7 +15,7 @@
"DownEncoderBlock2D",
"DownEncoderBlock2D"
],
- "force_upcast": true,
+ "force_upcast": false,
"in_channels": 3,
"latent_channels": 16,
"latents_mean": null,
diff --git a/configs/sdxl/vae/config.json b/configs/sdxl/vae/config.json
index a66a171ba..1c7a60866 100644
--- a/configs/sdxl/vae/config.json
+++ b/configs/sdxl/vae/config.json
@@ -15,7 +15,7 @@
"DownEncoderBlock2D",
"DownEncoderBlock2D"
],
- "force_upcast": true,
+ "force_upcast": false,
"in_channels": 3,
"latent_channels": 4,
"layers_per_block": 2,
diff --git a/modules/shared.py b/modules/shared.py
index aa41a6fd6..068ee8b40 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -432,15 +432,15 @@ def get_default_modes():
cmd_opts.lowvram = True
default_offload_mode = "sequential"
log.info(f"Device detect: memory={gpu_memory:.1f} optimization=lowvram")
- elif gpu_memory <= 8:
- cmd_opts.medvram = True
- default_offload_mode = "model"
- log.info(f"Device detect: memory={gpu_memory:.1f} optimization=medvram")
+ # elif gpu_memory <= 8:
+ # cmd_opts.medvram = True
+ # default_offload_mode = "model"
+ # log.info(f"Device detect: memory={gpu_memory:.1f} optimization=medvram")
else:
- default_offload_mode = "none"
- log.info(f"Device detect: memory={gpu_memory:.1f} optimization=none")
+ default_offload_mode = "balanced"
+ log.info(f"Device detect: memory={gpu_memory:.1f} optimization=balanced")
elif cmd_opts.medvram:
- default_offload_mode = "model"
+ default_offload_mode = "balanced"
elif cmd_opts.lowvram:
default_offload_mode = "sequential"
From 293494841306b971dd75c9c51dd446f1f7abc0b9 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Thu, 5 Dec 2024 09:04:42 -0500
Subject: [PATCH 083/162] lora one more safe cast
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 4 ++--
modules/lora/networks.py | 8 ++------
modules/sd_detect.py | 2 +-
3 files changed, 5 insertions(+), 9 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d49c1c555..80c5dd474 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
# Change Log for SD.Next
-## Update for 2024-12-02
+## Update for 2024-12-05
### New models and integrations
@@ -53,7 +53,7 @@
- balanced offload: add both high and low watermark
default is 25% for low-watermark (skip offload if memory usage is below 25%) and 70% high-watermark (must offload if memory usage is above 70%)
- change-in-behavior:
- `lowvrwam` triggers *sequential offload*, also automatically triggered on systems with <=4GB vram
+ low-end systems, triggered by either `lowvrwam` or by detection of <=4GB will use *sequential offload*
all other systems use *balanced offload* by default (can be changed in settings)
previous behavior was to use *model offload* on systems with <=8GB and `medvram` and no offload by default
- VAE upcase is now disabled by default on all systems
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 735c00c4c..20626d9ef 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -369,11 +369,11 @@ def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
updown, ex_bias = module.calc_updown(weight)
t1 = time.time()
if batch_updown is not None and updown is not None:
- batch_updown += updown
+ batch_updown += updown.to(batch_updown.device, non_blocking=True)
else:
batch_updown = updown
if batch_ex_bias is not None and ex_bias is not None:
- batch_ex_bias += ex_bias
+ batch_ex_bias += ex_bias.to(batch_ex_bias.device, non_blocking=True)
else:
batch_ex_bias = ex_bias
timer['calc'] += t1 - t0
@@ -396,10 +396,6 @@ def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
errors.display(e, 'LoRA')
raise RuntimeError('LoRA apply weight') from e
continue
- if module is None:
- continue
- shared.log.warning(f'LoRA network="{net.name}" layer="{network_layer_name}" unsupported operation')
- extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
return batch_updown, batch_ex_bias
diff --git a/modules/sd_detect.py b/modules/sd_detect.py
index 062bb32e1..071a83d7e 100644
--- a/modules/sd_detect.py
+++ b/modules/sd_detect.py
@@ -105,7 +105,7 @@ def detect_pipeline(f: str, op: str = 'model', warning=True, quiet=False):
guess = 'Stable Diffusion XL Instruct'
# get actual pipeline
pipeline = shared_items.get_pipelines().get(guess, None) if pipeline is None else pipeline
- if not quiet:
+ if debug_load is not None:
shared.log.info(f'Autodetect {op}: detect="{guess}" class={getattr(pipeline, "__name__", None)} file="{f}" size={size}MB')
t0 = time.time()
keys = model_tools.get_safetensor_keys(f)
From dda3d2d653bf3a34743b3d04b59891288ceb4faa Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Thu, 5 Dec 2024 10:03:17 -0500
Subject: [PATCH 084/162] flux redux allow prompt
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 1 +
installer.py | 2 +-
scripts/flux_tools.py | 39 ++++++++++++++++++++++++++++++++-------
3 files changed, 34 insertions(+), 8 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 80c5dd474..601cba683 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,7 @@
*todo*: support for Canny/Depth LoRAs
- [Redux](https://huggingface.co/black-forest-labs/FLUX.1-Redux-dev): ~0.1GB
works together with existing model and basically uses input image to analyze it and use that instead of prompt
+ *optional* can use prompt to combine guidance with input image
*recommended*: low denoise strength levels result in more variety
- [Fill](https://huggingface.co/black-forest-labs/FLUX.1-Fill-dev): ~23.8GB, replaces currently loaded model
*note*: can be used in inpaint/outpaint mode only
diff --git a/installer.py b/installer.py
index c1ec1f177..93cd10413 100644
--- a/installer.py
+++ b/installer.py
@@ -459,7 +459,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
def check_diffusers():
if args.skip_all or args.skip_requirements:
return
- sha = '63b631f38336f56755fb5cf15d9b0fb70bbf6323' # diffusers commit hash
+ sha = '3335e2262d47e7d7e311a44dea7f454b5f01b643' # diffusers commit hash
pkg = pkg_resources.working_set.by_key.get('diffusers', None)
minor = int(pkg.version.split('.')[1] if pkg is not None else 0)
cur = opts.get('diffusers_version', '') if minor > 0 else ''
diff --git a/scripts/flux_tools.py b/scripts/flux_tools.py
index 3fbab6c6f..909257a37 100644
--- a/scripts/flux_tools.py
+++ b/scripts/flux_tools.py
@@ -26,11 +26,21 @@ def ui(self, _is_img2img): # ui elements
with gr.Row():
tool = gr.Dropdown(label='Tool', choices=['None', 'Redux', 'Fill', 'Canny', 'Depth'], value='None')
with gr.Row():
- process = gr.Checkbox(label='Preprocess input images', value=True)
- strength = gr.Checkbox(label='Override denoise strength', value=True)
- return [tool, strength, process]
+ prompt = gr.Slider(label='Redux prompt strength', minimum=0, maximum=2, step=0.01, value=0, visible=False)
+ process = gr.Checkbox(label='Control preprocess input images', value=True, visible=False)
+ strength = gr.Checkbox(label='Control override denoise strength', value=True, visible=False)
- def run(self, p: processing.StableDiffusionProcessing, tool: str = 'None', strength: bool = True, process: bool = True): # pylint: disable=arguments-differ
+ def display(tool):
+ return [
+ gr.update(visible=tool in ['Redux']),
+ gr.update(visible=tool in ['Canny', 'Depth']),
+ gr.update(visible=tool in ['Canny', 'Depth']),
+ ]
+
+ tool.change(fn=display, inputs=[tool], outputs=[prompt, process, strength])
+ return [tool, prompt, strength, process]
+
+ def run(self, p: processing.StableDiffusionProcessing, tool: str = 'None', prompt: float = 1.0, strength: bool = True, process: bool = True): # pylint: disable=arguments-differ
global redux_pipe, processor_canny, processor_depth # pylint: disable=global-statement
if tool is None or tool == 'None':
return
@@ -50,6 +60,7 @@ def run(self, p: processing.StableDiffusionProcessing, tool: str = 'None', stren
t0 = time.time()
if tool == 'Redux':
# pipe_prior_redux = FluxPriorReduxPipeline.from_pretrained("black-forest-labs/FLUX.1-Redux-dev", revision="refs/pr/8", torch_dtype=torch.bfloat16).to("cuda")
+ shared.log.debug(f'{title}: tool={tool} prompt={prompt}')
if redux_pipe is None:
redux_pipe = diffusers.FluxPriorReduxPipeline.from_pretrained(
"black-forest-labs/FLUX.1-Redux-dev",
@@ -57,7 +68,21 @@ def run(self, p: processing.StableDiffusionProcessing, tool: str = 'None', stren
torch_dtype=devices.dtype,
cache_dir=shared.opts.hfcache_dir
).to(devices.device)
- redux_output = redux_pipe(image)
+ if prompt > 0:
+ shared.log.info(f'{title}: tool={tool} load text encoder')
+ redux_pipe.tokenizer, redux_pipe.tokenizer_2 = shared.sd_model.tokenizer, shared.sd_model.tokenizer_2
+ redux_pipe.text_encoder, redux_pipe.text_encoder_2 = shared.sd_model.text_encoder, shared.sd_model.text_encoder_2
+ sd_models.apply_balanced_offload(redux_pipe)
+ redux_output = redux_pipe(
+ image=image,
+ prompt=p.prompt if prompt > 0 else None,
+ prompt_embeds_scale=[prompt],
+ pooled_prompt_embeds_scale=[prompt],
+ )
+ if prompt > 0:
+ redux_pipe.tokenizer, redux_pipe.tokenizer_2 = None, None
+ redux_pipe.text_encoder, redux_pipe.text_encoder_2 = None, None
+ devices.torch_gc()
for k, v in redux_output.items():
p.task_args[k] = v
else:
@@ -77,7 +102,7 @@ def run(self, p: processing.StableDiffusionProcessing, tool: str = 'None', stren
p.task_args['mask_image'] = p.image_mask
if tool == 'Canny':
- # pipe = FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-Canny-dev", torch_dtype=torch.bfloat16, revision="refs/pr/1").to("cuda")
+ # pipe = diffusers.FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-Canny-dev", torch_dtype=torch.bfloat16, revision="refs/pr/1").to("cuda")
install('controlnet-aux')
install('timm==0.9.16')
if shared.sd_model.__class__.__name__ != 'FluxControlPipeline' or 'Canny' not in shared.opts.sd_model_checkpoint:
@@ -99,7 +124,7 @@ def run(self, p: processing.StableDiffusionProcessing, tool: str = 'None', stren
processor_canny = None
if tool == 'Depth':
- # pipe = FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-Depth-dev", torch_dtype=torch.bfloat16, revision="refs/pr/1").to("cuda")
+ # pipe = diffusers.FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-Depth-dev", torch_dtype=torch.bfloat16, revision="refs/pr/1").to("cuda")
install('git+https://github.com/huggingface/image_gen_aux.git', 'image_gen_aux')
if shared.sd_model.__class__.__name__ != 'FluxControlPipeline' or 'Depth' not in shared.opts.sd_model_checkpoint:
shared.opts.data["sd_model_checkpoint"] = "black-forest-labs/FLUX.1-Depth-dev"
From 16ab1e2ae88add2ccc2a53648e55af8f00fe3d97 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Fri, 6 Dec 2024 07:23:31 -0500
Subject: [PATCH 085/162] safer lora unapply
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 2 +-
modules/lora/networks.py | 19 ++++++++++---------
2 files changed, 11 insertions(+), 10 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 601cba683..be9a573ea 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
# Change Log for SD.Next
-## Update for 2024-12-05
+## Update for 2024-12-06
### New models and integrations
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 20626d9ef..f1fdb0c45 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -334,6 +334,7 @@ def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.n
if shared.opts.lora_offload_backup and weights_backup is not None and isinstance(weights_backup, torch.Tensor):
weights_backup = weights_backup.to(devices.cpu)
self.network_weights_backup = weights_backup
+
bias_backup = getattr(self, "network_bias_backup", None)
if bias_backup is None:
if getattr(self, 'bias', None) is not None:
@@ -380,12 +381,9 @@ def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
if shared.opts.diffusers_offload_mode != "none":
t0 = time.time()
if batch_updown is not None:
- batch_updown = batch_updown.to(devices.cpu, non_blocking=True)
+ batch_updown = batch_updown.to(devices.cpu)
if batch_ex_bias is not None:
- batch_ex_bias = batch_ex_bias.to(devices.cpu, non_blocking=True)
- if devices.backend == "ipex":
- # using non_blocking=True here causes NaNs on Intel
- torch.xpu.synchronize(devices.device)
+ batch_ex_bias = batch_ex_bias.to(devices.cpu)
t1 = time.time()
timer['move'] += t1 - t0
except RuntimeError as e:
@@ -405,6 +403,7 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
bias_backup = getattr(self, "network_bias_backup", None)
if weights_backup is None and bias_backup is None:
return None, None
+
if weights_backup is not None:
if isinstance(weights_backup, bool):
weights_backup = self.weight
@@ -417,12 +416,13 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
if getattr(self, "quant_type", None) in ['nf4', 'fp4'] and bnb is not None:
self.weight = bnb.nn.Params4bit(new_weight, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
else:
- self.weight = torch.nn.Parameter(new_weight.to(device=orig_device, non_blocking=True), requires_grad=False)
+ self.weight = torch.nn.Parameter(new_weight.to(device=orig_device), requires_grad=False)
del new_weight
else:
- self.weight = torch.nn.Parameter(weights_backup.to(device=orig_device, non_blocking=True), requires_grad=False)
+ self.weight = torch.nn.Parameter(weights_backup.to(device=orig_device), requires_grad=False)
if hasattr(self, "qweight") and hasattr(self, "freeze"):
self.freeze()
+
if bias_backup is not None:
if isinstance(bias_backup, bool):
bias_backup = self.bias
@@ -430,12 +430,13 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
self.bias = None
if ex_bias is not None:
new_weight = bias_backup.to(devices.device, non_blocking=True) + ex_bias.to(devices.device, non_blocking=True)
- self.bias = torch.nn.Parameter(new_weight.to(device=orig_device, non_blocking=True), requires_grad=False)
+ self.bias = torch.nn.Parameter(new_weight.to(device=orig_device), requires_grad=False)
del new_weight
else:
- self.bias = torch.nn.Parameter(bias_backup.to(device=orig_device, non_blocking=True), requires_grad=False)
+ self.bias = torch.nn.Parameter(bias_backup.to(device=orig_device), requires_grad=False)
else:
self.bias = None
+
t1 = time.time()
timer['apply'] += t1 - t0
return self.weight.device, self.weight.dtype
From 2a1fbd904fb871d1fbd911c9455ccbf26e56eb10 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Fri, 6 Dec 2024 07:36:20 -0500
Subject: [PATCH 086/162] handle os err
Signed-off-by: Vladimir Mandic
---
modules/memstats.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/modules/memstats.py b/modules/memstats.py
index 7836f7636..fd5f152a0 100644
--- a/modules/memstats.py
+++ b/modules/memstats.py
@@ -20,10 +20,10 @@ def memory_stats():
mem.update({ 'ram': ram })
except Exception as e:
if not fail_once:
- shared.log.error('Memory stats: {e}')
+ shared.log.error(f'Memory stats: {e}')
errors.display(e, 'Memory stats')
fail_once = True
- mem.update({ 'ram': str(e) })
+ mem.update({ 'ram': { 'error': str(e) } })
try:
s = torch.cuda.mem_get_info()
gpu = { 'used': gb(s[1] - s[0]), 'total': gb(s[1]) }
From 28eeb00432f682d38134dc1f65e67ca2728c9e79 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Fri, 6 Dec 2024 09:15:37 -0500
Subject: [PATCH 087/162] remove non-blocking
Signed-off-by: Vladimir Mandic
---
cli/load-unet.py | 4 ++--
modules/lora/networks.py | 12 ++++++------
modules/processing_vae.py | 2 +-
modules/rife/__init__.py | 4 ++--
modules/sd_hijack_accelerate.py | 8 ++++----
modules/sd_models.py | 2 +-
6 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/cli/load-unet.py b/cli/load-unet.py
index 2398cdb64..c910101b0 100644
--- a/cli/load-unet.py
+++ b/cli/load-unet.py
@@ -33,13 +33,13 @@ def set_module_tensor(
stats.dtypes[value.dtype] = 0
stats.dtypes[value.dtype] += 1
if name in module._buffers: # pylint: disable=protected-access
- module._buffers[name] = value.to(device=device, dtype=dtype, non_blocking=True) # pylint: disable=protected-access
+ module._buffers[name] = value.to(device=device, dtype=dtype) # pylint: disable=protected-access
if 'buffers' not in stats.weights:
stats.weights['buffers'] = 0
stats.weights['buffers'] += 1
elif value is not None:
param_cls = type(module._parameters[name]) # pylint: disable=protected-access
- module._parameters[name] = param_cls(value, requires_grad=old_value.requires_grad).to(device, dtype=dtype, non_blocking=True) # pylint: disable=protected-access
+ module._parameters[name] = param_cls(value, requires_grad=old_value.requires_grad).to(device, dtype=dtype) # pylint: disable=protected-access
if 'parameters' not in stats.weights:
stats.weights['parameters'] = 0
stats.weights['parameters'] += 1
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index f1fdb0c45..5a093370c 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -359,7 +359,7 @@ def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.n
def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], weight: torch.Tensor, network_layer_name: str):
if shared.opts.diffusers_offload_mode == "none":
- self.to(devices.device, non_blocking=True)
+ self.to(devices.device)
batch_updown = None
batch_ex_bias = None
for net in loaded_networks:
@@ -370,11 +370,11 @@ def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
updown, ex_bias = module.calc_updown(weight)
t1 = time.time()
if batch_updown is not None and updown is not None:
- batch_updown += updown.to(batch_updown.device, non_blocking=True)
+ batch_updown += updown.to(batch_updown.device)
else:
batch_updown = updown
if batch_ex_bias is not None and ex_bias is not None:
- batch_ex_bias += ex_bias.to(batch_ex_bias.device, non_blocking=True)
+ batch_ex_bias += ex_bias.to(batch_ex_bias.device)
else:
batch_ex_bias = ex_bias
timer['calc'] += t1 - t0
@@ -412,7 +412,7 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
if updown is not None and len(weights_backup.shape) == 4 and weights_backup.shape[1] == 9: # inpainting model. zero pad updown to make channel[1] 4 to 9
updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5)) # pylint: disable=not-callable
if updown is not None:
- new_weight = weights_backup.to(devices.device, non_blocking=True) + updown.to(devices.device, non_blocking=True)
+ new_weight = weights_backup.to(devices.device) + updown.to(devices.device)
if getattr(self, "quant_type", None) in ['nf4', 'fp4'] and bnb is not None:
self.weight = bnb.nn.Params4bit(new_weight, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
else:
@@ -429,7 +429,7 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
else:
self.bias = None
if ex_bias is not None:
- new_weight = bias_backup.to(devices.device, non_blocking=True) + ex_bias.to(devices.device, non_blocking=True)
+ new_weight = bias_backup.to(devices.device) + ex_bias.to(devices.device)
self.bias = torch.nn.Parameter(new_weight.to(device=orig_device), requires_grad=False)
del new_weight
else:
@@ -482,7 +482,7 @@ def network_activate():
pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} skip')
continue
weight = getattr(module, 'weight', None)
- weight = weight.to(devices.device, non_blocking=True) if weight is not None else None
+ weight = weight.to(devices.device) if weight is not None else None
backup_size += network_backup_weights(module, weight, network_layer_name, wanted_names)
batch_updown, batch_ex_bias = network_calc_weights(module, weight, network_layer_name)
weights_device, weights_dtype = network_apply_weights(module, batch_updown, batch_ex_bias, orig_device)
diff --git a/modules/processing_vae.py b/modules/processing_vae.py
index b114e01d3..1c4a45f07 100644
--- a/modules/processing_vae.py
+++ b/modules/processing_vae.py
@@ -117,7 +117,7 @@ def full_vae_decode(latents, model):
model.vae.orig_dtype = model.vae.dtype
model.vae = model.vae.to(dtype=torch.float32)
latents = latents.to(torch.float32)
- latents = latents.to(devices.device, non_blocking=True)
+ latents = latents.to(devices.device)
if getattr(model.vae, "post_quant_conv", None) is not None:
latents = latents.to(next(iter(model.vae.post_quant_conv.parameters())).dtype)
diff --git a/modules/rife/__init__.py b/modules/rife/__init__.py
index f74f3d984..2a636eb2f 100644
--- a/modules/rife/__init__.py
+++ b/modules/rife/__init__.py
@@ -82,13 +82,13 @@ def f_pad(img):
for _i in range(pad): # fill starting frames
buffer.put(frame)
- I1 = f_pad(torch.from_numpy(np.transpose(frame, (2,0,1))).to(devices.device, non_blocking=True).unsqueeze(0).float() / 255.)
+ I1 = f_pad(torch.from_numpy(np.transpose(frame, (2,0,1))).to(devices.device).unsqueeze(0).float() / 255.)
with torch.no_grad():
with tqdm(total=len(images), desc='Interpolate', unit='frame') as pbar:
for image in images:
frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
I0 = I1
- I1 = f_pad(torch.from_numpy(np.transpose(frame, (2,0,1))).to(devices.device, non_blocking=True).unsqueeze(0).float() / 255.)
+ I1 = f_pad(torch.from_numpy(np.transpose(frame, (2,0,1))).to(devices.device).unsqueeze(0).float() / 255.)
I0_small = F.interpolate(I0, (32, 32), mode='bilinear', align_corners=False).to(torch.float32)
I1_small = F.interpolate(I1, (32, 32), mode='bilinear', align_corners=False).to(torch.float32)
ssim = ssim_matlab(I0_small[:, :3], I1_small[:, :3])
diff --git a/modules/sd_hijack_accelerate.py b/modules/sd_hijack_accelerate.py
index 90eac5c4e..f8cf8983f 100644
--- a/modules/sd_hijack_accelerate.py
+++ b/modules/sd_hijack_accelerate.py
@@ -35,10 +35,10 @@ def hijack_set_module_tensor(
with devices.inference_context():
# note: majority of time is spent on .to(old_value.dtype)
if tensor_name in module._buffers: # pylint: disable=protected-access
- module._buffers[tensor_name] = value.to(device, old_value.dtype, non_blocking=True) # pylint: disable=protected-access
+ module._buffers[tensor_name] = value.to(device, old_value.dtype) # pylint: disable=protected-access
elif value is not None or not devices.same_device(torch.device(device), module._parameters[tensor_name].device): # pylint: disable=protected-access
param_cls = type(module._parameters[tensor_name]) # pylint: disable=protected-access
- module._parameters[tensor_name] = param_cls(value, requires_grad=old_value.requires_grad).to(device, old_value.dtype, non_blocking=True) # pylint: disable=protected-access
+ module._parameters[tensor_name] = param_cls(value, requires_grad=old_value.requires_grad).to(device, old_value.dtype) # pylint: disable=protected-access
t1 = time.time()
tensor_to_timer += (t1 - t0)
@@ -63,10 +63,10 @@ def hijack_set_module_tensor_simple(
old_value = getattr(module, tensor_name)
with devices.inference_context():
if tensor_name in module._buffers: # pylint: disable=protected-access
- module._buffers[tensor_name] = value.to(device, non_blocking=True) # pylint: disable=protected-access
+ module._buffers[tensor_name] = value.to(device) # pylint: disable=protected-access
elif value is not None or not devices.same_device(torch.device(device), module._parameters[tensor_name].device): # pylint: disable=protected-access
param_cls = type(module._parameters[tensor_name]) # pylint: disable=protected-access
- module._parameters[tensor_name] = param_cls(value, requires_grad=old_value.requires_grad).to(device, non_blocking=True) # pylint: disable=protected-access
+ module._parameters[tensor_name] = param_cls(value, requires_grad=old_value.requires_grad).to(device) # pylint: disable=protected-access
t1 = time.time()
tensor_to_timer += (t1 - t0)
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 63ec6b327..8853916e4 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -529,7 +529,7 @@ def move_model(model, device=None, force=False):
t0 = time.time()
try:
if hasattr(model, 'to'):
- model.to(device, non_blocking=True)
+ model.to(device)
if hasattr(model, "prior_pipe"):
model.prior_pipe.to(device)
except Exception as e0:
From 461be710502a65684c7e1ea3f84549392c09560c Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Fri, 6 Dec 2024 13:51:25 -0500
Subject: [PATCH 088/162] update wiki
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 1 +
wiki | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index be9a573ea..b3aaec73f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -72,6 +72,7 @@
### Updates
+- Additional Wiki content: Styles, Wildcards, etc.
- **OpenVINO**: update to 2024.5.0
- **Sampler** improvements
- Euler FlowMatch: add sigma methods (*karras/exponential/betas*)
diff --git a/wiki b/wiki
index 2a83f725b..c5d484397 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit 2a83f725bda6a81399f579ba7102741f71b0be39
+Subproject commit c5d484397f7504fdea098d5e24c843a69c9fd2a2
From 6c9101dfaf0e29f0bd55f702cffd53f9e71a0347 Mon Sep 17 00:00:00 2001
From: AI-Casanova <54461896+AI-Casanova@users.noreply.github.com>
Date: Fri, 6 Dec 2024 22:54:08 -0600
Subject: [PATCH 089/162] lora low memory mode: switching requires manual model
reload
---
modules/lora/networks.py | 67 +++++++++++++++++++++++++++++++++++++---
modules/shared.py | 1 +
2 files changed, 64 insertions(+), 4 deletions(-)
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 5a093370c..9618f01a9 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -316,7 +316,7 @@ def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.n
weights_backup = getattr(self, "network_weights_backup", None)
if weights_backup is None and wanted_names != (): # pylint: disable=C1803
self.network_weights_backup = None
- if shared.opts.lora_fuse_diffusers:
+ if shared.opts.lora_fuse_diffusers or shared.opts.lora_low_memory:
weights_backup = True
elif getattr(weight, "quant_type", None) in ['nf4', 'fp4']:
if bnb is None:
@@ -338,7 +338,7 @@ def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.n
bias_backup = getattr(self, "network_bias_backup", None)
if bias_backup is None:
if getattr(self, 'bias', None) is not None:
- if shared.opts.lora_fuse_diffusers:
+ if shared.opts.lora_fuse_diffusers or shared.opts.lora_low_memory:
bias_backup = True
else:
bias_backup = self.bias.clone()
@@ -397,7 +397,7 @@ def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
return batch_updown, batch_ex_bias
-def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown: torch.Tensor, ex_bias: torch.Tensor, orig_device: torch.device):
+def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown: torch.Tensor, ex_bias: torch.Tensor, orig_device: torch.device, deactivate: bool = False):
t0 = time.time()
weights_backup = getattr(self, "network_weights_backup", None)
bias_backup = getattr(self, "network_bias_backup", None)
@@ -412,6 +412,8 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
if updown is not None and len(weights_backup.shape) == 4 and weights_backup.shape[1] == 9: # inpainting model. zero pad updown to make channel[1] 4 to 9
updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5)) # pylint: disable=not-callable
if updown is not None:
+ if deactivate:
+ updown *= -1
new_weight = weights_backup.to(devices.device) + updown.to(devices.device)
if getattr(self, "quant_type", None) in ['nf4', 'fp4'] and bnb is not None:
self.weight = bnb.nn.Params4bit(new_weight, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
@@ -429,6 +431,8 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
else:
self.bias = None
if ex_bias is not None:
+ if deactivate:
+ ex_bias *= -1
new_weight = bias_backup.to(devices.device) + ex_bias.to(devices.device)
self.bias = torch.nn.Parameter(new_weight.to(device=orig_device), requires_grad=False)
del new_weight
@@ -443,7 +447,62 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
def network_deactivate():
- pass
+ if not shared.opts.lora_low_memory:
+ return
+ timer['deactivate'] = 0
+ t0 = time.time()
+ sd_model = getattr(shared.sd_model, "pipe", shared.sd_model) # wrapped model compatiblility
+ if shared.opts.diffusers_offload_mode == "sequential":
+ sd_models.disable_offload(sd_model)
+ sd_models.move_model(sd_model, device=devices.cpu)
+ modules = {}
+ for component_name in ['text_encoder', 'text_encoder_2', 'unet', 'transformer']:
+ component = getattr(sd_model, component_name, None)
+ if component is not None and hasattr(component, 'named_modules'):
+ modules[component_name] = list(component.named_modules())
+ total = sum(len(x) for x in modules.values())
+ if len(loaded_networks) > 0:
+ pbar = rp.Progress(rp.TextColumn('[cyan]Deactivate network: type=LoRA'), rp.BarColumn(), rp.TaskProgressColumn(),
+ rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'),
+ console=shared.console)
+ task = pbar.add_task(description='', total=total)
+ else:
+ task = None
+ pbar = nullcontext()
+ with devices.inference_context(), pbar:
+ applied = 0
+ weights_devices = []
+ weights_dtypes = []
+ for component in modules.keys():
+ orig_device = getattr(sd_model, component, None).device
+ for _, module in modules[component]:
+ network_layer_name = getattr(module, 'network_layer_name', None)
+ if shared.state.interrupted or network_layer_name is None:
+ if task is not None:
+ pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} skip')
+ continue
+ weight = getattr(module, 'weight', None)
+ weight = weight.to(devices.device) if weight is not None else None
+ batch_updown, batch_ex_bias = network_calc_weights(module, weight, network_layer_name)
+ weights_device, weights_dtype = network_apply_weights(module, batch_updown, batch_ex_bias, orig_device, deactivate=True)
+ weights_devices.append(weights_device)
+ weights_dtypes.append(weights_dtype)
+ if batch_updown is not None or batch_ex_bias is not None:
+ applied += 1
+ del weight, batch_updown, batch_ex_bias
+ module.network_current_names = ()
+ if task is not None:
+ pbar.update(task, advance=1,
+ description=f'networks={len(loaded_networks)} modules={len(modules)} deactivate={applied}')
+ weights_devices, weights_dtypes = list(set([x for x in weights_devices if x is not None])), list(set([x for x in weights_dtypes if x is not None])) # noqa: C403 # pylint: disable=R1718
+ if debug and len(loaded_networks) > 0:
+ shared.log.debug(
+ f'Deactivate network: type=LoRA networks={len(loaded_networks)} modules={total} deactivate={applied} device={weights_devices} dtype={weights_dtypes} fuse={shared.opts.lora_fuse_diffusers} time={get_timers()}')
+ modules.clear()
+ if shared.opts.diffusers_offload_mode == "sequential":
+ sd_models.set_diffuser_offload(sd_model, op="model")
+ t1 = time.time()
+ timer['deactivate'] += t1 - t0
def network_activate():
timer['backup'] = 0
diff --git a/modules/shared.py b/modules/shared.py
index 068ee8b40..f8a989270 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -913,6 +913,7 @@ def get_default_modes():
"lora_apply_tags": OptionInfo(0, "LoRA auto-apply tags", gr.Slider, {"minimum": -1, "maximum": 32, "step": 1}),
"lora_in_memory_limit": OptionInfo(0, "LoRA memory cache", gr.Slider, {"minimum": 0, "maximum": 24, "step": 1}),
"lora_quant": OptionInfo("NF4","LoRA precision in quantized models", gr.Radio, {"choices": ["NF4", "FP4"]}),
+ "lora_low_memory": OptionInfo(False, "LoRA low memory mode"),
}))
options_templates.update(options_section((None, "Internal options"), {
From f346cccb518d2e0b0c52895e93155f1f49ed8ec4 Mon Sep 17 00:00:00 2001
From: QuantumSoul
Date: Sat, 7 Dec 2024 14:13:16 +0100
Subject: [PATCH 090/162] Create mkdocs.yml
---
mkdocs.yml | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 49 insertions(+)
create mode 100644 mkdocs.yml
diff --git a/mkdocs.yml b/mkdocs.yml
new file mode 100644
index 000000000..7fdeb24db
--- /dev/null
+++ b/mkdocs.yml
@@ -0,0 +1,49 @@
+site_name: SD.Next Documentation
+site_url: https://vladmandic.github.io/automatic
+repo_url: https://github.com/vladmandic/automatic
+repo_name: vladmandic/automatic
+docs_dir: wiki
+
+theme:
+ name: material
+
+ features:
+ - navigation.footer
+ - navigation.instant
+ - navigation.instant.prefetch
+ - navigation.instant.progress
+ - navigation.tracking
+
+ palette:
+ - media: "(prefers-color-scheme: light)"
+ scheme: default
+ toggle:
+ icon: material/weather-night
+ name: Switch to dark mode
+ primary: teal
+ accent: pink
+
+ - media: "(prefers-color-scheme: dark)"
+ scheme: slate
+ toggle:
+ icon: material/weather-sunny
+ name: Switch to light mode
+ primary: green
+ accent: yellow
+
+ logo: assets/favicon.svg
+ favicon: assets/favicon.svg
+
+extra:
+ social:
+ - icon: fontawesome/brands/discord
+ link: https://discord.gg/VjvR2tabEX
+ name: Discord
+ - icon: fontawesome/brands/youtube
+ link: https://www.youtube.com/@SDNext
+ name: Youtube
+
+markdown_extensions:
+ - admonition
+ - pymdownx.details
+ - pymdownx.superfences
\ No newline at end of file
From 1185950c4ae648175f899cc8e87c539cf042af43 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Mon, 9 Dec 2024 13:40:19 -0500
Subject: [PATCH 091/162] yet another lora refactor
Signed-off-by: Vladimir Mandic
---
extensions-builtin/Lora/network_lora.py | 3 -
mkdocs.yml | 49 -----
modules/extra_networks.py | 16 +-
modules/face/faceid.py | 10 +-
modules/lora/extra_networks_lora.py | 8 +-
modules/lora/lora_timers.py | 38 ++++
modules/lora/network_lora.py | 3 -
modules/lora/networks.py | 240 ++++++++++++------------
modules/processing.py | 14 +-
modules/processing_args.py | 1 +
modules/processing_callbacks.py | 2 +-
modules/processing_class.py | 1 +
modules/processing_diffusers.py | 15 +-
modules/prompt_parser_diffusers.py | 2 +-
modules/shared.py | 5 +-
wiki | 2 +-
16 files changed, 193 insertions(+), 216 deletions(-)
delete mode 100644 mkdocs.yml
create mode 100644 modules/lora/lora_timers.py
diff --git a/extensions-builtin/Lora/network_lora.py b/extensions-builtin/Lora/network_lora.py
index 5e6eaef6c..8ebda2e22 100644
--- a/extensions-builtin/Lora/network_lora.py
+++ b/extensions-builtin/Lora/network_lora.py
@@ -22,7 +22,6 @@ def __init__(self, net: network.Network, weights: network.NetworkWeights):
self.dim = weights.w["lora_down.weight"].shape[0]
def create_module(self, weights, key, none_ok=False):
- from modules.shared import opts
weight = weights.get(key)
if weight is None and none_ok:
return None
@@ -49,8 +48,6 @@ def create_module(self, weights, key, none_ok=False):
if weight.shape != module.weight.shape:
weight = weight.reshape(module.weight.shape)
module.weight.copy_(weight)
- if opts.lora_load_gpu:
- module = module.to(device=devices.device, dtype=devices.dtype)
module.weight.requires_grad_(False)
return module
diff --git a/mkdocs.yml b/mkdocs.yml
deleted file mode 100644
index 7fdeb24db..000000000
--- a/mkdocs.yml
+++ /dev/null
@@ -1,49 +0,0 @@
-site_name: SD.Next Documentation
-site_url: https://vladmandic.github.io/automatic
-repo_url: https://github.com/vladmandic/automatic
-repo_name: vladmandic/automatic
-docs_dir: wiki
-
-theme:
- name: material
-
- features:
- - navigation.footer
- - navigation.instant
- - navigation.instant.prefetch
- - navigation.instant.progress
- - navigation.tracking
-
- palette:
- - media: "(prefers-color-scheme: light)"
- scheme: default
- toggle:
- icon: material/weather-night
- name: Switch to dark mode
- primary: teal
- accent: pink
-
- - media: "(prefers-color-scheme: dark)"
- scheme: slate
- toggle:
- icon: material/weather-sunny
- name: Switch to light mode
- primary: green
- accent: yellow
-
- logo: assets/favicon.svg
- favicon: assets/favicon.svg
-
-extra:
- social:
- - icon: fontawesome/brands/discord
- link: https://discord.gg/VjvR2tabEX
- name: Discord
- - icon: fontawesome/brands/youtube
- link: https://www.youtube.com/@SDNext
- name: Youtube
-
-markdown_extensions:
- - admonition
- - pymdownx.details
- - pymdownx.superfences
\ No newline at end of file
diff --git a/modules/extra_networks.py b/modules/extra_networks.py
index fca48e21c..e96d2e5b7 100644
--- a/modules/extra_networks.py
+++ b/modules/extra_networks.py
@@ -74,9 +74,12 @@ def is_stepwise(en_obj):
return any([len(str(x).split("@")) > 1 for x in all_args]) # noqa C419 # pylint: disable=use-a-generator
-def activate(p, extra_network_data, step=0):
+def activate(p, extra_network_data=None, step=0):
"""call activate for extra networks in extra_network_data in specified order, then call activate for all remaining registered networks with an empty argument list"""
- if extra_network_data is None:
+ if p.disable_extra_networks:
+ return
+ extra_network_data = extra_network_data or p.network_data
+ if extra_network_data is None or len(extra_network_data) == 0:
return
stepwise = False
for extra_network_args in extra_network_data.values():
@@ -106,15 +109,18 @@ def activate(p, extra_network_data, step=0):
except Exception as e:
errors.display(e, f"Activating network: type={extra_network_name}")
- p.extra_network_data = extra_network_data
+ p.network_data = extra_network_data
if stepwise:
p.stepwise_lora = True
shared.opts.data['lora_functional'] = functional
-def deactivate(p, extra_network_data):
+def deactivate(p, extra_network_data=None):
"""call deactivate for extra networks in extra_network_data in specified order, then call deactivate for all remaining registered networks"""
- if extra_network_data is None:
+ if p.disable_extra_networks:
+ return
+ extra_network_data = extra_network_data or p.network_data
+ if extra_network_data is None or len(extra_network_data) == 0:
return
for extra_network_name in extra_network_data:
extra_network = extra_network_registry.get(extra_network_name, None)
diff --git a/modules/face/faceid.py b/modules/face/faceid.py
index b74e15dc5..4a4f07531 100644
--- a/modules/face/faceid.py
+++ b/modules/face/faceid.py
@@ -204,7 +204,6 @@ def face_id(
ip_model_dict["face_image"] = face_images
ip_model_dict["faceid_embeds"] = face_embeds # overwrite placeholder
faceid_model.set_scale(scale)
- extra_network_data = None
if p.all_prompts is None or len(p.all_prompts) == 0:
processing.process_init(p)
@@ -215,11 +214,9 @@ def face_id(
p.negative_prompts = p.all_negative_prompts[n * p.batch_size:(n+1) * p.batch_size]
p.seeds = p.all_seeds[n * p.batch_size:(n+1) * p.batch_size]
p.subseeds = p.all_subseeds[n * p.batch_size:(n+1) * p.batch_size]
- p.prompts, extra_network_data = extra_networks.parse_prompts(p.prompts)
+ p.prompts, p.network_data = extra_networks.parse_prompts(p.prompts)
- if not p.disable_extra_networks:
- with devices.autocast():
- extra_networks.activate(p, extra_network_data)
+ extra_networks.activate(p, p.network_data)
ip_model_dict.update({
"prompt": p.prompts[0],
"negative_prompt": p.negative_prompts[0],
@@ -239,8 +236,7 @@ def face_id(
devices.torch_gc()
ipadapter.unapply(p.sd_model)
- if not p.disable_extra_networks:
- extra_networks.deactivate(p, extra_network_data)
+ extra_networks.deactivate(p, p.network_data)
p.extra_generation_params["IP Adapter"] = f"{basename}:{scale}"
finally:
diff --git a/modules/lora/extra_networks_lora.py b/modules/lora/extra_networks_lora.py
index 57966550a..4ce7a94a9 100644
--- a/modules/lora/extra_networks_lora.py
+++ b/modules/lora/extra_networks_lora.py
@@ -1,5 +1,4 @@
import re
-import time
import numpy as np
import modules.lora.networks as networks
from modules import extra_networks, shared
@@ -128,10 +127,9 @@ def activate(self, p, params_list, step=0):
if len(networks.loaded_networks) > 0 and step == 0:
infotext(p)
prompt(p)
- shared.log.info(f'Load network: type=LoRA apply={[n.name for n in networks.loaded_networks]} te={te_multipliers} unet={unet_multipliers} time={networks.get_timers()}')
+ shared.log.info(f'Load network: type=LoRA apply={[n.name for n in networks.loaded_networks]} te={te_multipliers} unet={unet_multipliers} time={networks.timer.summary}')
def deactivate(self, p):
- t0 = time.time()
if shared.native and len(networks.diffuser_loaded) > 0:
if hasattr(shared.sd_model, "unload_lora_weights") and hasattr(shared.sd_model, "text_encoder"):
if not (shared.compiled_model_state is not None and shared.compiled_model_state.is_compiled is True):
@@ -142,10 +140,8 @@ def deactivate(self, p):
except Exception:
pass
networks.network_deactivate()
- t1 = time.time()
- networks.timer['restore'] += t1 - t0
if self.active and networks.debug:
- shared.log.debug(f"Network end: type=LoRA load={networks.timer['load']:.2f} apply={networks.timer['apply']:.2f} restore={networks.timer['restore']:.2f}")
+ shared.log.debug(f"Network end: type=LoRA time={networks.timer.summary}")
if self.errors:
for k, v in self.errors.items():
shared.log.error(f'LoRA: name="{k}" errors={v}')
diff --git a/modules/lora/lora_timers.py b/modules/lora/lora_timers.py
new file mode 100644
index 000000000..30c35a728
--- /dev/null
+++ b/modules/lora/lora_timers.py
@@ -0,0 +1,38 @@
+class Timer():
+ list: float = 0
+ load: float = 0
+ backup: float = 0
+ calc: float = 0
+ apply: float = 0
+ move: float = 0
+ restore: float = 0
+ activate: float = 0
+ deactivate: float = 0
+
+ @property
+ def total(self):
+ return round(self.activate + self.deactivate, 2)
+
+ @property
+ def summary(self):
+ t = {}
+ for k, v in self.__dict__.items():
+ if v > 0.1:
+ t[k] = round(v, 2)
+ return t
+
+ def clear(self, complete: bool = False):
+ self.backup = 0
+ self.calc = 0
+ self.apply = 0
+ self.move = 0
+ self.restore = 0
+ if complete:
+ self.activate = 0
+ self.deactivate = 0
+
+ def add(self, name, t):
+ self.__dict__[name] += t
+
+ def __str__(self):
+ return f'{self.__class__.__name__}({self.summary})'
diff --git a/modules/lora/network_lora.py b/modules/lora/network_lora.py
index 6c1d7ea3f..8bf475ebc 100644
--- a/modules/lora/network_lora.py
+++ b/modules/lora/network_lora.py
@@ -22,7 +22,6 @@ def __init__(self, net: network.Network, weights: network.NetworkWeights):
self.dim = weights.w["lora_down.weight"].shape[0]
def create_module(self, weights, key, none_ok=False):
- from modules.shared import opts
weight = weights.get(key)
if weight is None and none_ok:
return None
@@ -49,8 +48,6 @@ def create_module(self, weights, key, none_ok=False):
if weight.shape != module.weight.shape:
weight = weight.reshape(module.weight.shape)
module.weight.copy_(weight)
- if opts.lora_load_gpu:
- module = module.to(device=devices.device, dtype=devices.dtype)
module.weight.requires_grad_(False)
return module
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 9618f01a9..805b24b52 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -1,24 +1,15 @@
from typing import Union, List
+from contextlib import nullcontext
import os
import re
import time
import concurrent
-from contextlib import nullcontext
import torch
import diffusers.models.lora
import rich.progress as rp
-import modules.lora.network as network
-import modules.lora.network_lora as network_lora
-import modules.lora.network_hada as network_hada
-import modules.lora.network_ia3 as network_ia3
-import modules.lora.network_oft as network_oft
-import modules.lora.network_lokr as network_lokr
-import modules.lora.network_full as network_full
-import modules.lora.network_norm as network_norm
-import modules.lora.network_glora as network_glora
-import modules.lora.network_overrides as network_overrides
-import modules.lora.lora_convert as lora_convert
+from modules.lora import lora_timers, network, lora_convert, network_overrides
+from modules.lora import network_lora, network_hada, network_ia3, network_oft, network_lokr, network_full, network_norm, network_glora
from modules.lora.extra_networks_lora import ExtraNetworkLora
from modules import shared, devices, sd_models, sd_models_compile, errors, files_cache, model_quant
@@ -28,7 +19,6 @@
available_networks = {}
available_network_aliases = {}
loaded_networks: List[network.Network] = []
-timer = { 'list': 0, 'load': 0, 'backup': 0, 'calc': 0, 'apply': 0, 'move': 0, 'restore': 0, 'deactivate': 0 }
bnb = None
lora_cache = {}
diffuser_loaded = []
@@ -36,6 +26,7 @@
available_network_hash_lookup = {}
forbidden_network_aliases = {}
re_network_name = re.compile(r"(.*)\s*\([0-9a-fA-F]+\)")
+timer = lora_timers.Timer()
module_types = [
network_lora.ModuleTypeLora(),
network_hada.ModuleTypeHada(),
@@ -47,19 +38,6 @@
network_glora.ModuleTypeGLora(),
]
-
-def total_time():
- return sum(timer.values())
-
-
-def get_timers():
- t = { 'total': round(sum(timer.values()), 2) }
- for k, v in timer.items():
- if v > 0.1:
- t[k] = round(v, 2)
- return t
-
-
# section: load networks from disk
def load_diffusers(name, network_on_disk, lora_scale=shared.opts.extra_networks_default_multiplier) -> Union[network.Network, None]:
@@ -154,7 +132,7 @@ def load_safetensors(name, network_on_disk) -> Union[network.Network, None]:
if debug:
shared.log.debug(f'LoRA name="{name}" unmatched={keys_failed_to_match}')
else:
- shared.log.debug(f'LoRA name="{name}" type={set(network_types)} keys={len(matched_networks)}')
+ shared.log.debug(f'LoRA name="{name}" type={set(network_types)} keys={len(matched_networks)} direct={shared.opts.lora_fuse_diffusers}')
if len(matched_networks) == 0:
return None
lora_cache[name] = net
@@ -222,12 +200,11 @@ def add_network(filename):
for fn in candidates:
executor.submit(add_network, fn)
t1 = time.time()
- timer['list'] = t1 - t0
+ timer.list = t1 - t0
shared.log.info(f'Available LoRAs: path="{shared.cmd_opts.lora_dir}" items={len(available_networks)} folders={len(forbidden_network_aliases)} time={t1 - t0:.2f}')
def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=None):
- timer['list'] = 0
networks_on_disk: list[network.NetworkOnDisk] = [available_network_aliases.get(name, None) for name in names]
if any(x is None for x in networks_on_disk):
list_available_networks()
@@ -301,13 +278,12 @@ def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=Non
if len(loaded_networks) > 0:
devices.torch_gc()
- t1 = time.time()
- timer['load'] = t1 - t0
+ timer.load = time.time() - t0
# section: process loaded networks
-def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], weight: torch.Tensor, network_layer_name: str, wanted_names: tuple):
+def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], network_layer_name: str, wanted_names: tuple):
global bnb # pylint: disable=W0603
backup_size = 0
if len(loaded_networks) > 0 and network_layer_name is not None and any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419 # pylint: disable=R1729
@@ -315,9 +291,10 @@ def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.n
weights_backup = getattr(self, "network_weights_backup", None)
if weights_backup is None and wanted_names != (): # pylint: disable=C1803
+ weight = getattr(self, 'weight', None)
self.network_weights_backup = None
- if shared.opts.lora_fuse_diffusers or shared.opts.lora_low_memory:
- weights_backup = True
+ if shared.opts.lora_fuse_diffusers:
+ self.network_weights_backup = True
elif getattr(weight, "quant_type", None) in ['nf4', 'fp4']:
if bnb is None:
bnb = model_quant.load_bnb('Load network: type=LoRA', silent=True)
@@ -329,86 +306,112 @@ def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.n
self.blocksize = weight.blocksize
else:
weights_backup = weight.clone()
+ weights_backup = weights_backup.to(devices.cpu)
else:
weights_backup = weight.clone()
- if shared.opts.lora_offload_backup and weights_backup is not None and isinstance(weights_backup, torch.Tensor):
weights_backup = weights_backup.to(devices.cpu)
- self.network_weights_backup = weights_backup
bias_backup = getattr(self, "network_bias_backup", None)
if bias_backup is None:
if getattr(self, 'bias', None) is not None:
- if shared.opts.lora_fuse_diffusers or shared.opts.lora_low_memory:
- bias_backup = True
+ if shared.opts.lora_fuse_diffusers:
+ self.network_bias_backup = True
else:
bias_backup = self.bias.clone()
- else:
- bias_backup = None
- if shared.opts.lora_offload_backup and bias_backup is not None and isinstance(bias_backup, torch.Tensor):
- bias_backup = bias_backup.to(devices.cpu)
- self.network_bias_backup = bias_backup
+ bias_backup = bias_backup.to(devices.cpu)
if getattr(self, 'network_weights_backup', None) is not None:
backup_size += self.network_weights_backup.numel() * self.network_weights_backup.element_size() if isinstance(self.network_weights_backup, torch.Tensor) else 0
if getattr(self, 'network_bias_backup', None) is not None:
backup_size += self.network_bias_backup.numel() * self.network_bias_backup.element_size() if isinstance(self.network_bias_backup, torch.Tensor) else 0
- t1 = time.time()
- timer['backup'] += t1 - t0
+ timer.backup += time.time() - t0
return backup_size
-def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], weight: torch.Tensor, network_layer_name: str):
+def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], network_layer_name: str):
if shared.opts.diffusers_offload_mode == "none":
self.to(devices.device)
batch_updown = None
batch_ex_bias = None
for net in loaded_networks:
module = net.modules.get(network_layer_name, None)
- if module is not None and hasattr(self, 'weight'):
- try:
+ if module is None:
+ continue
+ try:
+ t0 = time.time()
+ weight = self.weight.to(devices.device)
+ updown, ex_bias = module.calc_updown(weight)
+ if batch_updown is not None and updown is not None:
+ batch_updown += updown.to(batch_updown.device)
+ else:
+ batch_updown = updown
+ if batch_ex_bias is not None and ex_bias is not None:
+ batch_ex_bias += ex_bias.to(batch_ex_bias.device)
+ else:
+ batch_ex_bias = ex_bias
+ timer.calc += time.time() - t0
+ if shared.opts.diffusers_offload_mode == "sequential":
t0 = time.time()
- updown, ex_bias = module.calc_updown(weight)
+ if batch_updown is not None:
+ batch_updown = batch_updown.to(devices.cpu)
+ if batch_ex_bias is not None:
+ batch_ex_bias = batch_ex_bias.to(devices.cpu)
t1 = time.time()
- if batch_updown is not None and updown is not None:
- batch_updown += updown.to(batch_updown.device)
- else:
- batch_updown = updown
- if batch_ex_bias is not None and ex_bias is not None:
- batch_ex_bias += ex_bias.to(batch_ex_bias.device)
- else:
- batch_ex_bias = ex_bias
- timer['calc'] += t1 - t0
- if shared.opts.diffusers_offload_mode != "none":
- t0 = time.time()
- if batch_updown is not None:
- batch_updown = batch_updown.to(devices.cpu)
- if batch_ex_bias is not None:
- batch_ex_bias = batch_ex_bias.to(devices.cpu)
- t1 = time.time()
- timer['move'] += t1 - t0
- except RuntimeError as e:
- extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
- if debug:
- module_name = net.modules.get(network_layer_name, None)
- shared.log.error(f'LoRA apply weight name="{net.name}" module="{module_name}" layer="{network_layer_name}" {e}')
- errors.display(e, 'LoRA')
- raise RuntimeError('LoRA apply weight') from e
- continue
+ timer.move += t1 - t0
+ except RuntimeError as e:
+ extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
+ if debug:
+ module_name = net.modules.get(network_layer_name, None)
+ shared.log.error(f'LoRA apply weight name="{net.name}" module="{module_name}" layer="{network_layer_name}" {e}')
+ errors.display(e, 'LoRA')
+ raise RuntimeError('LoRA apply weight') from e
+ continue
return batch_updown, batch_ex_bias
-def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown: torch.Tensor, ex_bias: torch.Tensor, orig_device: torch.device, deactivate: bool = False):
+def network_apply_direct(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown: torch.Tensor, ex_bias: torch.Tensor, deactivate: bool = False):
+ weights_backup = getattr(self, "network_weights_backup", False)
+ bias_backup = getattr(self, "network_bias_backup", False)
+ if not weights_backup and not bias_backup:
+ return None, None
t0 = time.time()
+
+ if weights_backup:
+ if updown is not None and len(self.weight.shape) == 4 and self.weight.shape[1] == 9: # inpainting model. zero pad updown to make channel[1] 4 to 9
+ updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5)) # pylint: disable=not-callable
+ if updown is not None:
+ if deactivate:
+ updown *= -1
+ new_weight = self.weight.to(devices.device) + updown.to(devices.device)
+ if getattr(self, "quant_type", None) in ['nf4', 'fp4'] and bnb is not None:
+ self.weight = bnb.nn.Params4bit(new_weight, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
+ else:
+ self.weight = torch.nn.Parameter(new_weight, requires_grad=False)
+ del new_weight
+ if hasattr(self, "qweight") and hasattr(self, "freeze"):
+ self.freeze()
+
+ if bias_backup:
+ if ex_bias is not None:
+ if deactivate:
+ ex_bias *= -1
+ new_weight = bias_backup.to(devices.device) + ex_bias.to(devices.device)
+ self.bias = torch.nn.Parameter(new_weight, requires_grad=False)
+ del new_weight
+
+ timer.apply += time.time() - t0
+ return self.weight.device, self.weight.dtype
+
+
+def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown: torch.Tensor, ex_bias: torch.Tensor, orig_device: torch.device, deactivate: bool = False):
weights_backup = getattr(self, "network_weights_backup", None)
bias_backup = getattr(self, "network_bias_backup", None)
if weights_backup is None and bias_backup is None:
return None, None
+ t0 = time.time()
if weights_backup is not None:
- if isinstance(weights_backup, bool):
- weights_backup = self.weight
- else:
- self.weight = None
+ self.weight = None
if updown is not None and len(weights_backup.shape) == 4 and weights_backup.shape[1] == 9: # inpainting model. zero pad updown to make channel[1] 4 to 9
updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5)) # pylint: disable=not-callable
if updown is not None:
@@ -426,10 +429,7 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
self.freeze()
if bias_backup is not None:
- if isinstance(bias_backup, bool):
- bias_backup = self.bias
- else:
- self.bias = None
+ self.bias = None
if ex_bias is not None:
if deactivate:
ex_bias *= -1
@@ -438,19 +438,16 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
del new_weight
else:
self.bias = torch.nn.Parameter(bias_backup.to(device=orig_device), requires_grad=False)
- else:
- self.bias = None
- t1 = time.time()
- timer['apply'] += t1 - t0
+ timer.apply += time.time() - t0
return self.weight.device, self.weight.dtype
def network_deactivate():
- if not shared.opts.lora_low_memory:
+ if not shared.opts.lora_fuse_diffusers:
return
- timer['deactivate'] = 0
t0 = time.time()
+ timer.clear()
sd_model = getattr(shared.sd_model, "pipe", shared.sd_model) # wrapped model compatiblility
if shared.opts.diffusers_offload_mode == "sequential":
sd_models.disable_offload(sd_model)
@@ -462,15 +459,13 @@ def network_deactivate():
modules[component_name] = list(component.named_modules())
total = sum(len(x) for x in modules.values())
if len(loaded_networks) > 0:
- pbar = rp.Progress(rp.TextColumn('[cyan]Deactivate network: type=LoRA'), rp.BarColumn(), rp.TaskProgressColumn(),
- rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'),
- console=shared.console)
+ pbar = rp.Progress(rp.TextColumn('[cyan]Network: type=LoRA action=deactivate'), rp.BarColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console)
task = pbar.add_task(description='', total=total)
else:
task = None
pbar = nullcontext()
with devices.inference_context(), pbar:
- applied = 0
+ applied_layers = []
weights_devices = []
weights_dtypes = []
for component in modules.keys():
@@ -479,36 +474,33 @@ def network_deactivate():
network_layer_name = getattr(module, 'network_layer_name', None)
if shared.state.interrupted or network_layer_name is None:
if task is not None:
- pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} skip')
+ pbar.update(task, advance=1)
continue
- weight = getattr(module, 'weight', None)
- weight = weight.to(devices.device) if weight is not None else None
- batch_updown, batch_ex_bias = network_calc_weights(module, weight, network_layer_name)
- weights_device, weights_dtype = network_apply_weights(module, batch_updown, batch_ex_bias, orig_device, deactivate=True)
+ batch_updown, batch_ex_bias = network_calc_weights(module, network_layer_name)
+ if shared.opts.lora_fuse_diffusers:
+ weights_device, weights_dtype = network_apply_direct(module, batch_updown, batch_ex_bias, deactivate=True)
+ else:
+ weights_device, weights_dtype = network_apply_weights(module, batch_updown, batch_ex_bias, orig_device, deactivate=True)
weights_devices.append(weights_device)
weights_dtypes.append(weights_dtype)
if batch_updown is not None or batch_ex_bias is not None:
- applied += 1
- del weight, batch_updown, batch_ex_bias
+ applied_layers.append(network_layer_name)
+ del batch_updown, batch_ex_bias
module.network_current_names = ()
if task is not None:
- pbar.update(task, advance=1,
- description=f'networks={len(loaded_networks)} modules={len(modules)} deactivate={applied}')
+ pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} modules={len(modules)} deactivate={len(applied_layers)}')
weights_devices, weights_dtypes = list(set([x for x in weights_devices if x is not None])), list(set([x for x in weights_dtypes if x is not None])) # noqa: C403 # pylint: disable=R1718
+ timer.deactivate = time.time() - t0
if debug and len(loaded_networks) > 0:
- shared.log.debug(
- f'Deactivate network: type=LoRA networks={len(loaded_networks)} modules={total} deactivate={applied} device={weights_devices} dtype={weights_dtypes} fuse={shared.opts.lora_fuse_diffusers} time={get_timers()}')
+ shared.log.debug(f'Deactivate network: type=LoRA networks={len(loaded_networks)} modules={total} deactivate={len(applied_layers)} device={weights_devices} dtype={weights_dtypes} fuse={shared.opts.lora_fuse_diffusers} time={timer.summary}')
modules.clear()
if shared.opts.diffusers_offload_mode == "sequential":
sd_models.set_diffuser_offload(sd_model, op="model")
- t1 = time.time()
- timer['deactivate'] += t1 - t0
+
def network_activate():
- timer['backup'] = 0
- timer['calc'] = 0
- timer['apply'] = 0
- timer['move'] = 0
+ t0 = time.time()
+ timer.clear(complete=True)
sd_model = getattr(shared.sd_model, "pipe", shared.sd_model) # wrapped model compatiblility
if shared.opts.diffusers_offload_mode == "sequential":
sd_models.disable_offload(sd_model)
@@ -520,14 +512,14 @@ def network_activate():
modules[component_name] = list(component.named_modules())
total = sum(len(x) for x in modules.values())
if len(loaded_networks) > 0:
- pbar = rp.Progress(rp.TextColumn('[cyan]Apply network: type=LoRA'), rp.BarColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console)
+ pbar = rp.Progress(rp.TextColumn('[cyan]Network: type=LoRA action=activate'), rp.BarColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console)
task = pbar.add_task(description='' , total=total)
else:
task = None
pbar = nullcontext()
with devices.inference_context(), pbar:
wanted_names = tuple((x.name, x.te_multiplier, x.unet_multiplier, x.dyn_dim) for x in loaded_networks) if len(loaded_networks) > 0 else ()
- applied = 0
+ applied_layers = []
backup_size = 0
weights_devices = []
weights_dtypes = []
@@ -536,26 +528,28 @@ def network_activate():
for _, module in modules[component]:
network_layer_name = getattr(module, 'network_layer_name', None)
current_names = getattr(module, "network_current_names", ())
- if shared.state.interrupted or network_layer_name is None or current_names == wanted_names:
+ if getattr(module, 'weight', None) is None or shared.state.interrupted or network_layer_name is None or current_names == wanted_names:
if task is not None:
- pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} skip')
+ pbar.update(task, advance=1)
continue
- weight = getattr(module, 'weight', None)
- weight = weight.to(devices.device) if weight is not None else None
- backup_size += network_backup_weights(module, weight, network_layer_name, wanted_names)
- batch_updown, batch_ex_bias = network_calc_weights(module, weight, network_layer_name)
- weights_device, weights_dtype = network_apply_weights(module, batch_updown, batch_ex_bias, orig_device)
+ backup_size += network_backup_weights(module, network_layer_name, wanted_names)
+ batch_updown, batch_ex_bias = network_calc_weights(module, network_layer_name)
+ if shared.opts.lora_fuse_diffusers:
+ weights_device, weights_dtype = network_apply_direct(module, batch_updown, batch_ex_bias)
+ else:
+ weights_device, weights_dtype = network_apply_weights(module, batch_updown, batch_ex_bias, orig_device)
weights_devices.append(weights_device)
weights_dtypes.append(weights_dtype)
if batch_updown is not None or batch_ex_bias is not None:
- applied += 1
- del weight, batch_updown, batch_ex_bias
+ applied_layers.append(network_layer_name)
+ del batch_updown, batch_ex_bias
module.network_current_names = wanted_names
if task is not None:
- pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} modules={len(modules)} apply={applied} backup={backup_size}')
+ pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} modules={total} apply={len(applied_layers)} backup={backup_size}')
weights_devices, weights_dtypes = list(set([x for x in weights_devices if x is not None])), list(set([x for x in weights_dtypes if x is not None])) # noqa: C403 # pylint: disable=R1718
+ timer.activate = time.time() - t0
if debug and len(loaded_networks) > 0:
- shared.log.debug(f'Load network: type=LoRA networks={len(loaded_networks)} modules={total} apply={applied} device={weights_devices} dtype={weights_dtypes} backup={backup_size} fuse={shared.opts.lora_fuse_diffusers} time={get_timers()}')
+ shared.log.debug(f'Load network: type=LoRA networks={len(loaded_networks)} modules={total} apply={len(applied_layers)} device={weights_devices} dtype={weights_dtypes} backup={backup_size} fuse={shared.opts.lora_fuse_diffusers} time={timer.summary}')
modules.clear()
if shared.opts.diffusers_offload_mode == "sequential":
sd_models.set_diffuser_offload(sd_model, op="model")
diff --git a/modules/processing.py b/modules/processing.py
index 57512850a..7ae397538 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -286,7 +286,6 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
t0 = time.time()
if not hasattr(p, 'skip_init'):
p.init(p.all_prompts, p.all_seeds, p.all_subseeds)
- extra_network_data = None
debug(f'Processing inner: args={vars(p)}')
for n in range(p.n_iter):
pag.apply(p)
@@ -311,9 +310,9 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
p.scripts.before_process_batch(p, batch_number=n, prompts=p.prompts, seeds=p.seeds, subseeds=p.subseeds)
if len(p.prompts) == 0:
break
- p.prompts, extra_network_data = extra_networks.parse_prompts(p.prompts)
- if not p.disable_extra_networks:
- extra_networks.activate(p, extra_network_data)
+ p.prompts, p.network_data = extra_networks.parse_prompts(p.prompts)
+ if not shared.native:
+ extra_networks.activate(p, p.network_data)
if p.scripts is not None and isinstance(p.scripts, scripts.ScriptRunner):
p.scripts.process_batch(p, batch_number=n, prompts=p.prompts, seeds=p.seeds, subseeds=p.subseeds)
@@ -417,6 +416,10 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
timer.process.record('post')
del samples
+
+ if not shared.native:
+ extra_networks.deactivate(p, p.network_data)
+
devices.torch_gc()
if hasattr(shared.sd_model, 'restore_pipeline') and shared.sd_model.restore_pipeline is not None:
@@ -445,9 +448,6 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
from modules import ipadapter
ipadapter.unapply(shared.sd_model)
- if not p.disable_extra_networks:
- extra_networks.deactivate(p, extra_network_data)
-
if shared.opts.include_mask:
if shared.opts.mask_apply_overlay and p.overlay_images is not None and len(p.overlay_images):
p.image_mask = create_binary_mask(p.overlay_images[0])
diff --git a/modules/processing_args.py b/modules/processing_args.py
index d73762d29..93b0bf9b2 100644
--- a/modules/processing_args.py
+++ b/modules/processing_args.py
@@ -101,6 +101,7 @@ def task_specific_kwargs(p, model):
def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2: typing.Optional[list]=None, negative_prompts_2: typing.Optional[list]=None, desc:str='', **kwargs):
t0 = time.time()
+ shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
apply_circular(p.tiling, model)
if hasattr(model, "set_progress_bar_config"):
model.set_progress_bar_config(bar_format='Progress {rate_fmt}{postfix} {bar} {percentage:3.0f}% {n_fmt}/{total_fmt} {elapsed} {remaining} ' + '\x1b[38;5;71m' + desc, ncols=80, colour='#327fba')
diff --git a/modules/processing_callbacks.py b/modules/processing_callbacks.py
index f3eb0bc37..0b4c7dfe1 100644
--- a/modules/processing_callbacks.py
+++ b/modules/processing_callbacks.py
@@ -67,7 +67,7 @@ def diffusers_callback(pipe, step: int = 0, timestep: int = 0, kwargs: dict = {}
raise AssertionError('Interrupted...')
time.sleep(0.1)
if hasattr(p, "stepwise_lora") and shared.native:
- extra_networks.activate(p, p.extra_network_data, step=step)
+ extra_networks.activate(p, step=step)
if latents is None:
return kwargs
elif shared.opts.nan_skip:
diff --git a/modules/processing_class.py b/modules/processing_class.py
index 21e86c1b0..2cbc07cc2 100644
--- a/modules/processing_class.py
+++ b/modules/processing_class.py
@@ -139,6 +139,7 @@ def __init__(self,
self.negative_pooleds = []
self.disable_extra_networks = False
self.iteration = 0
+ self.network_data = {}
# initializers
self.prompt = prompt
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index 0341cac4d..d22a9de97 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -4,7 +4,7 @@
import numpy as np
import torch
import torchvision.transforms.functional as TF
-from modules import shared, devices, processing, sd_models, errors, sd_hijack_hypertile, processing_vae, sd_models_compile, hidiffusion, timer, modelstats
+from modules import shared, devices, processing, sd_models, errors, sd_hijack_hypertile, processing_vae, sd_models_compile, hidiffusion, timer, modelstats, extra_networks
from modules.processing_helpers import resize_hires, calculate_base_steps, calculate_hires_steps, calculate_refiner_steps, save_intermediate, update_sampler, is_txt2img, is_refiner_enabled
from modules.processing_args import set_pipeline_args
from modules.onnx_impl import preprocess_pipeline as preprocess_onnx_pipeline, check_parameters_changed as olive_check_parameters_changed
@@ -89,6 +89,7 @@ def process_base(p: processing.StableDiffusionProcessing):
sd_models.move_model(shared.sd_model.unet, devices.device)
if hasattr(shared.sd_model, 'transformer'):
sd_models.move_model(shared.sd_model.transformer, devices.device)
+ extra_networks.activate(p)
hidiffusion.apply(p, shared.sd_model_type)
# if 'image' in base_args:
# base_args['image'] = set_latents(p)
@@ -223,11 +224,14 @@ def process_hires(p: processing.StableDiffusionProcessing, output):
shared.state.job = 'HiRes'
shared.state.sampling_steps = hires_args.get('prior_num_inference_steps', None) or p.steps or hires_args.get('num_inference_steps', None)
try:
+ shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
sd_models.move_model(shared.sd_model, devices.device)
if hasattr(shared.sd_model, 'unet'):
sd_models.move_model(shared.sd_model.unet, devices.device)
if hasattr(shared.sd_model, 'transformer'):
sd_models.move_model(shared.sd_model.transformer, devices.device)
+ if 'base' in p.skip:
+ extra_networks.activate(p)
sd_models_compile.check_deepcache(enable=True)
output = shared.sd_model(**hires_args) # pylint: disable=not-callable
if isinstance(output, dict):
@@ -345,6 +349,7 @@ def process_refine(p: processing.StableDiffusionProcessing, output):
def process_decode(p: processing.StableDiffusionProcessing, output):
+ shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
if output is not None:
if not hasattr(output, 'images') and hasattr(output, 'frames'):
shared.log.debug(f'Generated: frames={len(output.frames[0])}')
@@ -405,8 +410,6 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
shared.sd_model = orig_pipeline
return results
- shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
-
# sanitize init_images
if hasattr(p, 'init_images') and getattr(p, 'init_images', None) is None:
del p.init_images
@@ -453,13 +456,13 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
shared.sd_model = orig_pipeline
return results
- results = process_decode(p, output)
+ extra_networks.deactivate(p)
+ timer.process.add('lora', networks.timer.total)
+ results = process_decode(p, output)
timer.process.record('decode')
- timer.process.add('lora', networks.total_time())
shared.sd_model = orig_pipeline
-
shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
if p.state == '':
diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py
index 06c0b6012..8c140e0d6 100644
--- a/modules/prompt_parser_diffusers.py
+++ b/modules/prompt_parser_diffusers.py
@@ -92,7 +92,7 @@ def flatten(xss):
return [x for xs in xss for x in xs]
# unpack EN data in case of TE LoRA
- en_data = p.extra_network_data
+ en_data = p.network_data
en_data = [idx.items for item in en_data.values() for idx in item]
effective_batch = 1 if self.allsame else self.batchsize
key = str([self.prompts, self.negative_prompts, effective_batch, self.clip_skip, self.steps, en_data])
diff --git a/modules/shared.py b/modules/shared.py
index f8a989270..6e8f2f3fd 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -905,15 +905,12 @@ def get_default_modes():
"extra_networks_default_multiplier": OptionInfo(1.0, "Default strength", gr.Slider, {"minimum": 0.0, "maximum": 2.0, "step": 0.01}),
"lora_preferred_name": OptionInfo("filename", "LoRA preferred name", gr.Radio, {"choices": ["filename", "alias"], "visible": False}),
"lora_add_hashes_to_infotext": OptionInfo(False, "LoRA add hash info"),
- "lora_fuse_diffusers": OptionInfo(False if not cmd_opts.use_openvino else True, "LoRA fuse directly to model"),
- "lora_load_gpu": OptionInfo(True if not (cmd_opts.lowvram or cmd_opts.medvram) else False, "LoRA load directly to GPU"),
- "lora_offload_backup": OptionInfo(True, "LoRA offload backup weights"),
+ "lora_fuse_diffusers": OptionInfo(True, "LoRA fuse directly to model"),
"lora_force_diffusers": OptionInfo(False if not cmd_opts.use_openvino else True, "LoRA force loading of all models using Diffusers"),
"lora_maybe_diffusers": OptionInfo(False, "LoRA force loading of specific models using Diffusers"),
"lora_apply_tags": OptionInfo(0, "LoRA auto-apply tags", gr.Slider, {"minimum": -1, "maximum": 32, "step": 1}),
"lora_in_memory_limit": OptionInfo(0, "LoRA memory cache", gr.Slider, {"minimum": 0, "maximum": 24, "step": 1}),
"lora_quant": OptionInfo("NF4","LoRA precision in quantized models", gr.Radio, {"choices": ["NF4", "FP4"]}),
- "lora_low_memory": OptionInfo(False, "LoRA low memory mode"),
}))
options_templates.update(options_section((None, "Internal options"), {
diff --git a/wiki b/wiki
index c5d484397..20c9fe52f 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit c5d484397f7504fdea098d5e24c843a69c9fd2a2
+Subproject commit 20c9fe52f253c23e736227787ddebd4cbfcbfe68
From 6ef10195186ac1384b7d9d977df6a068a5949be3 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Mon, 9 Dec 2024 13:46:25 -0500
Subject: [PATCH 092/162] update hotkeys
Signed-off-by: Vladimir Mandic
---
javascript/script.js | 5 +++--
wiki | 2 +-
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/javascript/script.js b/javascript/script.js
index 250e90ba2..836d9b102 100644
--- a/javascript/script.js
+++ b/javascript/script.js
@@ -125,11 +125,12 @@ document.addEventListener('keydown', (e) => {
let elem;
if (e.key === 'Escape') elem = getUICurrentTabContent().querySelector('button[id$=_interrupt]');
if (e.key === 'Enter' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id$=_generate]');
- if (e.key === 'Backspace' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id$=_reprocess]');
+ if (e.key === 'r' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id$=_reprocess]');
if (e.key === ' ' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id$=_extra_networks_btn]');
+ if (e.key === 'n' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id$=_extra_networks_btn]');
if (e.key === 's' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id^=save_]');
if (e.key === 'Insert' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id^=save_]');
- if (e.key === 'Delete' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id^=delete_]');
+ if (e.key === 'd' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id^=delete_]');
// if (e.key === 'm' && e.ctrlKey) elem = gradioApp().getElementById('setting_sd_model_checkpoint');
if (elem) {
e.preventDefault();
diff --git a/wiki b/wiki
index 20c9fe52f..8960da514 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit 20c9fe52f253c23e736227787ddebd4cbfcbfe68
+Subproject commit 8960da514e9aff4a5d47402925c9498536443379
From 7c88bfb60a6b353f0a86b5bf1fc9f40d33d6974a Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Mon, 9 Dec 2024 14:16:26 -0500
Subject: [PATCH 093/162] fix preview choice
Signed-off-by: Vladimir Mandic
---
modules/sd_samplers_common.py | 6 ++----
modules/shared_state.py | 1 -
2 files changed, 2 insertions(+), 5 deletions(-)
diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py
index f6f6c18d5..a96795a25 100644
--- a/modules/sd_samplers_common.py
+++ b/modules/sd_samplers_common.py
@@ -35,7 +35,6 @@ def setup_img2img_steps(p, steps=None):
def single_sample_to_image(sample, approximation=None):
with queue_lock:
t0 = time.time()
- sd_cascade = False
if approximation is None:
approximation = approximation_indexes.get(shared.opts.show_progress_type, None)
if approximation is None:
@@ -50,10 +49,9 @@ def single_sample_to_image(sample, approximation=None):
if len(sample.shape) > 4: # likely unknown video latent (e.g. svd)
return Image.new(mode="RGB", size=(512, 512))
- if len(sample) == 16: # sd_cascade
- sd_cascade = True
if len(sample.shape) == 4 and sample.shape[0]: # likely animatediff latent
sample = sample.permute(1, 0, 2, 3)[0]
+ # TODO remove
if shared.native: # [-x,x] to [-5,5]
sample_max = torch.max(sample)
if sample_max > 5:
@@ -65,7 +63,7 @@ def single_sample_to_image(sample, approximation=None):
if approximation == 2: # TAESD
x_sample = sd_vae_taesd.decode(sample)
x_sample = (1.0 + x_sample) / 2.0 # preview requires smaller range
- elif sd_cascade and approximation != 3:
+ elif shared.sd_model_type == 'sc' and approximation != 3:
x_sample = sd_vae_stablecascade.decode(sample)
elif approximation == 0: # Simple
x_sample = sd_vae_approx.cheap_approximation(sample) * 0.5 + 0.5
diff --git a/modules/shared_state.py b/modules/shared_state.py
index 51d33f9ed..3d3cb1ae6 100644
--- a/modules/shared_state.py
+++ b/modules/shared_state.py
@@ -141,7 +141,6 @@ def set_current_image(self):
if self.job == 'VAE': # avoid generating preview while vae is running
return
from modules.shared import opts, cmd_opts
- """sets self.current_image from self.current_latent if enough sampling steps have been made after the last call to this"""
if cmd_opts.lowvram or self.api:
return
if abs(self.sampling_step - self.current_image_sampling_step) >= opts.show_progress_every_n_steps and opts.live_previews_enable and opts.show_progress_every_n_steps > 0:
From 383d7052ac135db857f2ab21e82f50437148ab07 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Mon, 9 Dec 2024 15:23:22 -0500
Subject: [PATCH 094/162] lora split te apply
Signed-off-by: Vladimir Mandic
---
modules/extra_networks.py | 45 ++++++++++++++++-------------
modules/lora/extra_networks_lora.py | 6 ++--
modules/lora/networks.py | 19 +++++++-----
modules/processing_args.py | 3 +-
modules/processing_diffusers.py | 2 +-
5 files changed, 43 insertions(+), 32 deletions(-)
diff --git a/modules/extra_networks.py b/modules/extra_networks.py
index e96d2e5b7..fe141cca1 100644
--- a/modules/extra_networks.py
+++ b/modules/extra_networks.py
@@ -1,6 +1,7 @@
import re
+import inspect
from collections import defaultdict
-from modules import errors, shared, devices
+from modules import errors, shared
extra_network_registry = {}
@@ -74,7 +75,7 @@ def is_stepwise(en_obj):
return any([len(str(x).split("@")) > 1 for x in all_args]) # noqa C419 # pylint: disable=use-a-generator
-def activate(p, extra_network_data=None, step=0):
+def activate(p, extra_network_data=None, step=0, include=[], exclude=[]):
"""call activate for extra networks in extra_network_data in specified order, then call activate for all remaining registered networks with an empty argument list"""
if p.disable_extra_networks:
return
@@ -89,25 +90,29 @@ def activate(p, extra_network_data=None, step=0):
shared.log.warning("Composable LoRA not compatible with 'lora_force_diffusers'")
stepwise = False
shared.opts.data['lora_functional'] = stepwise or functional
- with devices.autocast():
- for extra_network_name, extra_network_args in extra_network_data.items():
- extra_network = extra_network_registry.get(extra_network_name, None)
- if extra_network is None:
- errors.log.warning(f"Skipping unknown extra network: {extra_network_name}")
- continue
- try:
+
+ for extra_network_name, extra_network_args in extra_network_data.items():
+ extra_network = extra_network_registry.get(extra_network_name, None)
+ if extra_network is None:
+ errors.log.warning(f"Skipping unknown extra network: {extra_network_name}")
+ continue
+ try:
+ signature = list(inspect.signature(extra_network.activate).parameters)
+ if 'include' in signature and 'exclude' in signature:
+ extra_network.activate(p, extra_network_args, step=step, include=include, exclude=exclude)
+ else:
extra_network.activate(p, extra_network_args, step=step)
- except Exception as e:
- errors.display(e, f"Activating network: type={extra_network_name} args:{extra_network_args}")
-
- for extra_network_name, extra_network in extra_network_registry.items():
- args = extra_network_data.get(extra_network_name, None)
- if args is not None:
- continue
- try:
- extra_network.activate(p, [])
- except Exception as e:
- errors.display(e, f"Activating network: type={extra_network_name}")
+ except Exception as e:
+ errors.display(e, f"Activating network: type={extra_network_name} args:{extra_network_args}")
+
+ for extra_network_name, extra_network in extra_network_registry.items():
+ args = extra_network_data.get(extra_network_name, None)
+ if args is not None:
+ continue
+ try:
+ extra_network.activate(p, [])
+ except Exception as e:
+ errors.display(e, f"Activating network: type={extra_network_name}")
p.network_data = extra_network_data
if stepwise:
diff --git a/modules/lora/extra_networks_lora.py b/modules/lora/extra_networks_lora.py
index 4ce7a94a9..135df1ccb 100644
--- a/modules/lora/extra_networks_lora.py
+++ b/modules/lora/extra_networks_lora.py
@@ -112,7 +112,7 @@ def __init__(self):
self.model = None
self.errors = {}
- def activate(self, p, params_list, step=0):
+ def activate(self, p, params_list, step=0, include=[], exclude=[]):
self.errors.clear()
if self.active:
if self.model != shared.opts.sd_model_checkpoint: # reset if model changed
@@ -123,8 +123,8 @@ def activate(self, p, params_list, step=0):
self.model = shared.opts.sd_model_checkpoint
names, te_multipliers, unet_multipliers, dyn_dims = parse(p, params_list, step)
networks.network_load(names, te_multipliers, unet_multipliers, dyn_dims) # load
- networks.network_activate()
- if len(networks.loaded_networks) > 0 and step == 0:
+ networks.network_activate(include, exclude)
+ if len(networks.loaded_networks) > 0 and len(networks.applied_layers) > 0 and step == 0:
infotext(p)
prompt(p)
shared.log.info(f'Load network: type=LoRA apply={[n.name for n in networks.loaded_networks]} te={te_multipliers} unet={unet_multipliers} time={networks.timer.summary}')
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 805b24b52..edd82f3e4 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -19,6 +19,7 @@
available_networks = {}
available_network_aliases = {}
loaded_networks: List[network.Network] = []
+applied_layers: list[str] = []
bnb = None
lora_cache = {}
diffuser_loaded = []
@@ -465,7 +466,7 @@ def network_deactivate():
task = None
pbar = nullcontext()
with devices.inference_context(), pbar:
- applied_layers = []
+ applied_layers.clear()
weights_devices = []
weights_dtypes = []
for component in modules.keys():
@@ -498,7 +499,7 @@ def network_deactivate():
sd_models.set_diffuser_offload(sd_model, op="model")
-def network_activate():
+def network_activate(include=[], exclude=[]):
t0 = time.time()
timer.clear(complete=True)
sd_model = getattr(shared.sd_model, "pipe", shared.sd_model) # wrapped model compatiblility
@@ -506,10 +507,12 @@ def network_activate():
sd_models.disable_offload(sd_model)
sd_models.move_model(sd_model, device=devices.cpu)
modules = {}
- for component_name in ['text_encoder','text_encoder_2', 'unet', 'transformer']:
- component = getattr(sd_model, component_name, None)
+ components = include if len(include) > 0 else ['text_encoder', 'text_encoder_2', 'text_encoder_3', 'unet', 'transformer']
+ components = [x for x in components if x not in exclude]
+ for name in components:
+ component = getattr(sd_model, name, None)
if component is not None and hasattr(component, 'named_modules'):
- modules[component_name] = list(component.named_modules())
+ modules[name] = list(component.named_modules())
total = sum(len(x) for x in modules.values())
if len(loaded_networks) > 0:
pbar = rp.Progress(rp.TextColumn('[cyan]Network: type=LoRA action=activate'), rp.BarColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console)
@@ -519,7 +522,7 @@ def network_activate():
pbar = nullcontext()
with devices.inference_context(), pbar:
wanted_names = tuple((x.name, x.te_multiplier, x.unet_multiplier, x.dyn_dim) for x in loaded_networks) if len(loaded_networks) > 0 else ()
- applied_layers = []
+ applied_layers.clear()
backup_size = 0
weights_devices = []
weights_dtypes = []
@@ -546,10 +549,12 @@ def network_activate():
module.network_current_names = wanted_names
if task is not None:
pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} modules={total} apply={len(applied_layers)} backup={backup_size}')
+ if task is not None and len(applied_layers) == 0:
+ pbar.remove_task(task) # hide progress bar for no action
weights_devices, weights_dtypes = list(set([x for x in weights_devices if x is not None])), list(set([x for x in weights_dtypes if x is not None])) # noqa: C403 # pylint: disable=R1718
timer.activate = time.time() - t0
if debug and len(loaded_networks) > 0:
- shared.log.debug(f'Load network: type=LoRA networks={len(loaded_networks)} modules={total} apply={len(applied_layers)} device={weights_devices} dtype={weights_dtypes} backup={backup_size} fuse={shared.opts.lora_fuse_diffusers} time={timer.summary}')
+ shared.log.debug(f'Load network: type=LoRA networks={len(loaded_networks)} components={components} modules={total} apply={len(applied_layers)} device={weights_devices} dtype={weights_dtypes} backup={backup_size} fuse={shared.opts.lora_fuse_diffusers} time={timer.summary}')
modules.clear()
if shared.opts.diffusers_offload_mode == "sequential":
sd_models.set_diffuser_offload(sd_model, op="model")
diff --git a/modules/processing_args.py b/modules/processing_args.py
index 93b0bf9b2..e7f53ba8e 100644
--- a/modules/processing_args.py
+++ b/modules/processing_args.py
@@ -6,7 +6,7 @@
import inspect
import torch
import numpy as np
-from modules import shared, errors, sd_models, processing, processing_vae, processing_helpers, sd_hijack_hypertile, prompt_parser_diffusers, timer
+from modules import shared, errors, sd_models, processing, processing_vae, processing_helpers, sd_hijack_hypertile, prompt_parser_diffusers, timer, extra_networks
from modules.processing_callbacks import diffusers_callback_legacy, diffusers_callback, set_callbacks_p
from modules.processing_helpers import resize_hires, fix_prompts, calculate_base_steps, calculate_hires_steps, calculate_refiner_steps, get_generator, set_latents, apply_circular # pylint: disable=unused-import
from modules.api import helpers
@@ -134,6 +134,7 @@ def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2
else:
prompt_parser_diffusers.embedder = None
+ extra_networks.activate(p, include=['text_encoder', 'text_encoder_2', 'text_encoder_3'])
if 'prompt' in possible:
if 'OmniGen' in model.__class__.__name__:
prompts = [p.replace('|image|', '<|image_1|>') for p in prompts]
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index d22a9de97..627eb281f 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -89,7 +89,7 @@ def process_base(p: processing.StableDiffusionProcessing):
sd_models.move_model(shared.sd_model.unet, devices.device)
if hasattr(shared.sd_model, 'transformer'):
sd_models.move_model(shared.sd_model.transformer, devices.device)
- extra_networks.activate(p)
+ extra_networks.activate(p, exclude=['text_encoder', 'text_encoder_2'])
hidiffusion.apply(p, shared.sd_model_type)
# if 'image' in base_args:
# base_args['image'] = set_latents(p)
From 0f458853fdd9841dd8aa346172df271ee6358e5a Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Tue, 10 Dec 2024 08:41:38 -0500
Subject: [PATCH 095/162] fix sd upscale
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 3 ++-
scripts/sd_upscale.py | 5 +++--
2 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b3aaec73f..6775ef37e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
# Change Log for SD.Next
-## Update for 2024-12-06
+## Update for 2024-12-10
### New models and integrations
@@ -96,6 +96,7 @@
- simplify img2img/inpaint/sketch canvas handling
- fix prompt caching
- fix xyz grid skip final pass
+- fix sd upscale script
## Update for 2024-11-21
diff --git a/scripts/sd_upscale.py b/scripts/sd_upscale.py
index 9c5a72204..7ac31b603 100644
--- a/scripts/sd_upscale.py
+++ b/scripts/sd_upscale.py
@@ -48,7 +48,7 @@ def run(self, p, _, overlap, upscaler_index, scale_factor): # pylint: disable=ar
else:
img = init_img
devices.torch_gc()
- grid = images.split_grid(img, tile_w=p.width, tile_h=p.height, overlap=overlap)
+ grid = images.split_grid(img, tile_w=init_img.width, tile_h=init_img.height, overlap=overlap)
batch_size = p.batch_size
upscale_count = p.n_iter
p.n_iter = 1
@@ -61,7 +61,7 @@ def run(self, p, _, overlap, upscaler_index, scale_factor): # pylint: disable=ar
batch_count = math.ceil(len(work) / batch_size)
state.job_count = batch_count * upscale_count
- log.info(f"SD upscale: images={len(work)} tile={len(grid.tiles[0][2])}x{len(grid.tiles)} batches={state.job_count}")
+ log.info(f"SD upscale: images={len(work)} tiles={len(grid.tiles)} batches={state.job_count}")
result_images = []
for n in range(upscale_count):
@@ -91,4 +91,5 @@ def run(self, p, _, overlap, upscaler_index, scale_factor): # pylint: disable=ar
images.save_image(combined_image, p.outpath_samples, "", start_seed, p.prompt, opts.samples_format, info=initial_info, p=p)
processed = Processed(p, result_images, seed, initial_info)
+ log.info(f"SD upscale: images={result_images}")
return processed
From 042178fedbe8d83d6f7bb03ac13bff43008d17cf Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Tue, 10 Dec 2024 10:20:00 -0500
Subject: [PATCH 096/162] reorg settings
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 1 +
javascript/script.js | 2 +-
javascript/sdnext.css | 6 +-
modules/processing.py | 3 +
modules/processing_class.py | 3 +
modules/shared.py | 255 +++++++++++++++++++-----------------
scripts/cogvideo.py | 31 ++---
7 files changed, 161 insertions(+), 140 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6775ef37e..5412861a8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -67,6 +67,7 @@
- control: hide preview column by default
- control: optionn to hide input column
- control: add stats
+ - settings: reorganized and simplified
- browser -> server logging framework
- add addtional themes: `black-reimagined`, thanks @Artheriax
diff --git a/javascript/script.js b/javascript/script.js
index 836d9b102..f943f4626 100644
--- a/javascript/script.js
+++ b/javascript/script.js
@@ -125,7 +125,7 @@ document.addEventListener('keydown', (e) => {
let elem;
if (e.key === 'Escape') elem = getUICurrentTabContent().querySelector('button[id$=_interrupt]');
if (e.key === 'Enter' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id$=_generate]');
- if (e.key === 'r' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id$=_reprocess]');
+ if (e.key === 'i' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id$=_reprocess]');
if (e.key === ' ' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id$=_extra_networks_btn]');
if (e.key === 'n' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id$=_extra_networks_btn]');
if (e.key === 's' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id^=save_]');
diff --git a/javascript/sdnext.css b/javascript/sdnext.css
index c5145c973..60d835cd4 100644
--- a/javascript/sdnext.css
+++ b/javascript/sdnext.css
@@ -149,18 +149,20 @@ div#extras_scale_to_tab div.form { flex-direction: row; }
#settings>div.tab-content { flex: 10 0 75%; display: grid; }
#settings>div.tab-content>div { border: none; padding: 0; }
#settings>div.tab-content>div>div>div>div>div { flex-direction: unset; }
-#settings>div.tab-nav { display: grid; grid-template-columns: repeat(auto-fill, .5em minmax(10em, 1fr)); flex: 1 0 auto; width: 12em; align-self: flex-start; gap: var(--spacing-xxl); }
+#settings>div.tab-nav { display: grid; grid-template-columns: repeat(auto-fill, .5em minmax(10em, 1fr)); flex: 1 0 auto; width: 12em; align-self: flex-start; gap: 8px; }
#settings>div.tab-nav button { display: block; border: none; text-align: left; white-space: initial; padding: 0; }
#settings>div.tab-nav>#settings_show_all_pages { padding: var(--size-2) var(--size-4); }
#settings .block.gradio-checkbox { margin: 0; width: auto; }
#settings .dirtyable { gap: .5em; }
#settings .dirtyable.hidden { display: none; }
-#settings .modification-indicator { height: 1.2em; border-radius: 1em !important; padding: 0; width: 0; margin-right: 0.5em; }
+#settings .modification-indicator { height: 1.2em; border-radius: 1em !important; padding: 0; width: 0; margin-right: 0.5em; border-left: inset; }
#settings .modification-indicator:disabled { visibility: hidden; }
#settings .modification-indicator.saved { background: var(--color-accent-soft); width: var(--spacing-sm); }
#settings .modification-indicator.changed { background: var(--color-accent); width: var(--spacing-sm); }
#settings .modification-indicator.changed.unsaved { background-image: linear-gradient(var(--color-accent) 25%, var(--color-accent-soft) 75%); width: var(--spacing-sm); }
#settings_result { margin: 0 1.2em; }
+#tab_settings .gradio-slider, #tab_settings .gradio-dropdown { width: 300px !important; max-width: 300px; }
+#tab_settings textarea { max-width: 500px; }
.licenses { display: block !important; }
/* live preview */
diff --git a/modules/processing.py b/modules/processing.py
index 7ae397538..b4839e402 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -118,6 +118,9 @@ def js(self):
def infotext(self, p: StableDiffusionProcessing, index):
return create_infotext(p, self.all_prompts, self.all_seeds, self.all_subseeds, comments=[], position_in_batch=index % self.batch_size, iteration=index // self.batch_size)
+ def __str___(self):
+ return f'{self.__class__.__name__}: {self.__dict__}'
+
def process_images(p: StableDiffusionProcessing) -> Processed:
timer.process.reset()
diff --git a/modules/processing_class.py b/modules/processing_class.py
index 2cbc07cc2..7a7d9cd36 100644
--- a/modules/processing_class.py
+++ b/modules/processing_class.py
@@ -339,6 +339,9 @@ def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subs
def close(self):
self.sampler = None # pylint: disable=attribute-defined-outside-init
+ def __str__(self):
+ return f'{self.__class__.__name__}: {self.__dict__}'
+
class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
def __init__(self, **kwargs):
diff --git a/modules/shared.py b/modules/shared.py
index 6e8f2f3fd..3d7571029 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -467,69 +467,103 @@ def get_default_modes():
startup_offload_mode, startup_cross_attention, startup_sdp_options = get_default_modes()
-options_templates.update(options_section(('sd', "Execution & Models"), {
+options_templates.update(options_section(('sd', "Models & Loading"), {
"sd_backend": OptionInfo(default_backend, "Execution backend", gr.Radio, {"choices": ["diffusers", "original"] }),
+ "diffusers_pipeline": OptionInfo('Autodetect', 'Model pipeline', gr.Dropdown, lambda: {"choices": list(shared_items.get_pipelines()), "visible": native}),
"sd_model_checkpoint": OptionInfo(default_checkpoint, "Base model", DropdownEditable, lambda: {"choices": list_checkpoint_titles()}, refresh=refresh_checkpoints),
"sd_model_refiner": OptionInfo('None', "Refiner model", gr.Dropdown, lambda: {"choices": ['None'] + list_checkpoint_titles()}, refresh=refresh_checkpoints),
- "sd_vae": OptionInfo("Automatic", "VAE model", gr.Dropdown, lambda: {"choices": shared_items.sd_vae_items()}, refresh=shared_items.refresh_vae_list),
"sd_unet": OptionInfo("None", "UNET model", gr.Dropdown, lambda: {"choices": shared_items.sd_unet_items()}, refresh=shared_items.refresh_unet_list),
- "sd_text_encoder": OptionInfo('None', "Text encoder model", gr.Dropdown, lambda: {"choices": shared_items.sd_te_items()}, refresh=shared_items.refresh_te_list),
- "sd_model_dict": OptionInfo('None', "Use separate base dict", gr.Dropdown, lambda: {"choices": ['None'] + list_checkpoint_titles()}, refresh=refresh_checkpoints),
+ "latent_history": OptionInfo(16, "Latent history size", gr.Slider, {"minimum": 1, "maximum": 100, "step": 1}),
+
+ "offload_sep": OptionInfo("Model Offloading
", "", gr.HTML),
+ "diffusers_move_base": OptionInfo(False, "Move base model to CPU when using refiner", gr.Checkbox, {"visible": False }),
+ "diffusers_move_unet": OptionInfo(False, "Move base model to CPU when using VAE", gr.Checkbox, {"visible": False }),
+ "diffusers_move_refiner": OptionInfo(False, "Move refiner model to CPU when not in use", gr.Checkbox, {"visible": False }),
+ "diffusers_extract_ema": OptionInfo(False, "Use model EMA weights when possible", gr.Checkbox, {"visible": False }),
+ "diffusers_offload_mode": OptionInfo(startup_offload_mode, "Model offload mode", gr.Radio, {"choices": ['none', 'balanced', 'model', 'sequential']}),
+ "diffusers_offload_min_gpu_memory": OptionInfo(0.25, "Balanced offload GPU low watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
+ "diffusers_offload_max_gpu_memory": OptionInfo(0.70, "Balanced offload GPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
+ "diffusers_offload_max_cpu_memory": OptionInfo(0.90, "Balanced offload CPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
+
+ "advanced_sep": OptionInfo("Advanced Options
", "", gr.HTML),
"sd_checkpoint_autoload": OptionInfo(True, "Model autoload on start"),
"sd_checkpoint_autodownload": OptionInfo(True, "Model auto-download on demand"),
- "sd_textencoder_cache": OptionInfo(True, "Cache text encoder results", gr.Checkbox, {"visible": False}),
- "sd_textencoder_cache_size": OptionInfo(4, "Text encoder cache size", gr.Slider, {"minimum": 0, "maximum": 16, "step": 1}),
"stream_load": OptionInfo(False, "Load models using stream loading method", gr.Checkbox, {"visible": not native }),
+ "diffusers_eval": OptionInfo(True, "Force model eval", gr.Checkbox, {"visible": False }),
+ "diffusers_to_gpu": OptionInfo(False, "Load model directly to GPU"),
+ "disable_accelerate": OptionInfo(False, "Disable accelerate", gr.Checkbox, {"visible": False }),
+ "sd_model_dict": OptionInfo('None', "Use separate base dict", gr.Dropdown, lambda: {"choices": ['None'] + list_checkpoint_titles()}, refresh=refresh_checkpoints),
+ "sd_checkpoint_cache": OptionInfo(0, "Cached models", gr.Slider, {"minimum": 0, "maximum": 10, "step": 1, "visible": not native }),
+}))
+
+options_templates.update(options_section(('vae_encoder', "Variable Auto Encoder"), {
+ "sd_vae": OptionInfo("Automatic", "VAE model", gr.Dropdown, lambda: {"choices": shared_items.sd_vae_items()}, refresh=shared_items.refresh_vae_list),
+ "diffusers_vae_upcast": OptionInfo("default", "VAE upcasting", gr.Radio, {"choices": ['default', 'true', 'false']}),
+ "no_half_vae": OptionInfo(False if not cmd_opts.use_openvino else True, "Full precision (--no-half-vae)"),
+ "diffusers_vae_slicing": OptionInfo(True, "VAE slicing", gr.Checkbox, {"visible": native}),
+ "diffusers_vae_tiling": OptionInfo(cmd_opts.lowvram or cmd_opts.medvram, "VAE tiling", gr.Checkbox, {"visible": native}),
+ "sd_vae_sliced_encode": OptionInfo(False, "VAE sliced encode", gr.Checkbox, {"visible": not native}),
+ "nan_skip": OptionInfo(False, "Skip Generation if NaN found in latents", gr.Checkbox),
+ "rollback_vae": OptionInfo(False, "Attempt VAE roll back for NaN values"),
+}))
+
+options_templates.update(options_section(('text_encoder', "Text Encoder"), {
+ "sd_text_encoder": OptionInfo('None', "Text encoder model", gr.Dropdown, lambda: {"choices": shared_items.sd_te_items()}, refresh=shared_items.refresh_te_list),
+ "prompt_attention": OptionInfo("native", "Prompt attention parser", gr.Radio, {"choices": ["native", "compel", "xhinker", "a1111", "fixed"] }),
"prompt_mean_norm": OptionInfo(False, "Prompt attention normalization", gr.Checkbox),
+ "sd_textencoder_cache": OptionInfo(True, "Cache text encoder results", gr.Checkbox, {"visible": False}),
+ "sd_textencoder_cache_size": OptionInfo(4, "Text encoder cache size", gr.Slider, {"minimum": 0, "maximum": 16, "step": 1}),
"comma_padding_backtrack": OptionInfo(20, "Prompt padding", gr.Slider, {"minimum": 0, "maximum": 74, "step": 1, "visible": not native }),
- "prompt_attention": OptionInfo("native", "Prompt attention parser", gr.Radio, {"choices": ["native", "compel", "xhinker", "a1111", "fixed"] }),
- "latent_history": OptionInfo(16, "Latent history size", gr.Slider, {"minimum": 1, "maximum": 100, "step": 1}),
- "sd_checkpoint_cache": OptionInfo(0, "Cached models", gr.Slider, {"minimum": 0, "maximum": 10, "step": 1, "visible": not native }),
+ "diffusers_zeros_prompt_pad": OptionInfo(False, "Use zeros for prompt padding", gr.Checkbox),
+ "diffusers_pooled": OptionInfo("default", "Diffusers SDXL pooled embeds", gr.Radio, {"choices": ['default', 'weighted']}),
}))
options_templates.update(options_section(('cuda', "Compute Settings"), {
- "math_sep": OptionInfo("Execution precision
", "", gr.HTML),
+ "math_sep": OptionInfo("Execution Precision
", "", gr.HTML),
"precision": OptionInfo("Autocast", "Precision type", gr.Radio, {"choices": ["Autocast", "Full"]}),
"cuda_dtype": OptionInfo("Auto", "Device precision type", gr.Radio, {"choices": ["Auto", "FP32", "FP16", "BF16"]}),
+ "no_half": OptionInfo(False if not cmd_opts.use_openvino else True, "Full precision (--no-half)", None, None, None),
+ "upcast_sampling": OptionInfo(False if sys.platform != "darwin" else True, "Upcast sampling", gr.Checkbox, {"visible": not native}),
+ "upcast_attn": OptionInfo(False, "Upcast attention layer", gr.Checkbox, {"visible": not native}),
+ "cuda_cast_unet": OptionInfo(False, "Fixed UNet precision", gr.Checkbox, {"visible": not native}),
- "model_sep": OptionInfo("Model options
", "", gr.HTML),
- "no_half": OptionInfo(False if not cmd_opts.use_openvino else True, "Full precision for model (--no-half)", None, None, None),
- "no_half_vae": OptionInfo(False if not cmd_opts.use_openvino else True, "Full precision for VAE (--no-half-vae)"),
- "upcast_sampling": OptionInfo(False if sys.platform != "darwin" else True, "Upcast sampling"),
- "upcast_attn": OptionInfo(False, "Upcast attention layer"),
- "cuda_cast_unet": OptionInfo(False, "Fixed UNet precision"),
- "nan_skip": OptionInfo(False, "Skip Generation if NaN found in latents", gr.Checkbox),
- "rollback_vae": OptionInfo(False, "Attempt VAE roll back for NaN values"),
+ "generator_sep": OptionInfo("Noise Options
", "", gr.HTML),
+ "diffusers_generator_device": OptionInfo("GPU", "Generator device", gr.Radio, {"choices": ["GPU", "CPU", "Unset"]}),
"cross_attention_sep": OptionInfo("Cross Attention
", "", gr.HTML),
- "cross_attention_optimization": OptionInfo(startup_cross_attention, "Attention optimization method", gr.Radio, lambda: {"choices": shared_items.list_crossattention(native) }),
- "sdp_options": OptionInfo(startup_sdp_options, "SDP options", gr.CheckboxGroup, {"choices": ['Flash attention', 'Memory attention', 'Math attention', 'Dynamic attention', 'Sage attention'] }),
+ "cross_attention_optimization": OptionInfo(startup_cross_attention, "Attention optimization method", gr.Radio, lambda: {"choices": shared_items.list_crossattention(native)}),
+ "sdp_options": OptionInfo(startup_sdp_options, "SDP options", gr.CheckboxGroup, {"choices": ['Flash attention', 'Memory attention', 'Math attention', 'Dynamic attention', 'Sage attention'], "visible": native}),
"xformers_options": OptionInfo(['Flash attention'], "xFormers options", gr.CheckboxGroup, {"choices": ['Flash attention'] }),
"dynamic_attention_slice_rate": OptionInfo(4, "Dynamic Attention slicing rate in GB", gr.Slider, {"minimum": 0.1, "maximum": gpu_memory, "step": 0.1, "visible": native}),
"sub_quad_sep": OptionInfo("Sub-quadratic options
", "", gr.HTML, {"visible": not native}),
"sub_quad_q_chunk_size": OptionInfo(512, "Attention query chunk size", gr.Slider, {"minimum": 16, "maximum": 8192, "step": 8, "visible": not native}),
"sub_quad_kv_chunk_size": OptionInfo(512, "Attention kv chunk size", gr.Slider, {"minimum": 0, "maximum": 8192, "step": 8, "visible": not native}),
"sub_quad_chunk_threshold": OptionInfo(80, "Attention chunking threshold", gr.Slider, {"minimum": 0, "maximum": 100, "step": 1, "visible": not native}),
+}))
- "other_sep": OptionInfo("Execution options
", "", gr.HTML),
- "opt_channelslast": OptionInfo(False, "Use channels last "),
- "cudnn_deterministic": OptionInfo(False, "Use deterministic mode"),
- "cudnn_benchmark": OptionInfo(False, "Full-depth cuDNN benchmark feature"),
+options_templates.update(options_section(('backends', "Backend Settings"), {
+ "other_sep": OptionInfo("Torch Options
", "", gr.HTML),
+ "opt_channelslast": OptionInfo(False, "Channels last "),
+ "cudnn_deterministic": OptionInfo(False, "Deterministic mode"),
+ "cudnn_benchmark": OptionInfo(False, "Full-depth cuDNN benchmark"),
"diffusers_fuse_projections": OptionInfo(False, "Fused projections"),
- "torch_expandable_segments": OptionInfo(False, "Torch expandable segments"),
- "cuda_mem_fraction": OptionInfo(0.0, "Torch memory limit", gr.Slider, {"minimum": 0, "maximum": 2.0, "step": 0.05}),
- "torch_gc_threshold": OptionInfo(80, "Torch memory threshold for GC", gr.Slider, {"minimum": 0, "maximum": 100, "step": 1}),
- "torch_malloc": OptionInfo("native", "Torch memory allocator", gr.Radio, {"choices": ['native', 'cudaMallocAsync'] }),
+ "torch_expandable_segments": OptionInfo(False, "Expandable segments"),
+ "cuda_mem_fraction": OptionInfo(0.0, "Memory limit", gr.Slider, {"minimum": 0, "maximum": 2.0, "step": 0.05}),
+ "torch_gc_threshold": OptionInfo(80, "GC threshold", gr.Slider, {"minimum": 0, "maximum": 100, "step": 1}),
+ "inference_mode": OptionInfo("no-grad", "Inference mode", gr.Radio, {"choices": ["no-grad", "inference-mode", "none"]}),
+ "torch_malloc": OptionInfo("native", "Memory allocator", gr.Radio, {"choices": ['native', 'cudaMallocAsync'] }),
+
+ "onnx_sep": OptionInfo("ONNX
", "", gr.HTML),
+ "onnx_execution_provider": OptionInfo(execution_providers.get_default_execution_provider().value, 'ONNX Execution Provider', gr.Dropdown, lambda: {"choices": execution_providers.available_execution_providers }),
+ "onnx_cpu_fallback": OptionInfo(True, 'ONNX allow fallback to CPU'),
+ "onnx_cache_converted": OptionInfo(True, 'ONNX cache converted models'),
+ "onnx_unload_base": OptionInfo(False, 'ONNX unload base model when processing refiner'),
- "cuda_compile_sep": OptionInfo("Model Compile
", "", gr.HTML),
- "cuda_compile": OptionInfo([] if not cmd_opts.use_openvino else ["Model", "VAE"], "Compile Model", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "Upscaler"]}),
- "cuda_compile_backend": OptionInfo("none" if not cmd_opts.use_openvino else "openvino_fx", "Model compile backend", gr.Radio, {"choices": ['none', 'inductor', 'cudagraphs', 'aot_ts_nvfuser', 'hidet', 'migraphx', 'ipex', 'onediff', 'stable-fast', 'deep-cache', 'olive-ai', 'openvino_fx']}),
- "cuda_compile_mode": OptionInfo("default", "Model compile mode", gr.Radio, {"choices": ['default', 'reduce-overhead', 'max-autotune', 'max-autotune-no-cudagraphs']}),
- "cuda_compile_fullgraph": OptionInfo(True if not cmd_opts.use_openvino else False, "Model compile fullgraph"),
- "cuda_compile_precompile": OptionInfo(False, "Model compile precompile"),
- "cuda_compile_verbose": OptionInfo(False, "Model compile verbose mode"),
- "cuda_compile_errors": OptionInfo(True, "Model compile suppress errors"),
- "deep_cache_interval": OptionInfo(3, "DeepCache cache interval", gr.Slider, {"minimum": 1, "maximum": 10, "step": 1}),
+ "olive_sep": OptionInfo("Olive
", "", gr.HTML),
+ "olive_float16": OptionInfo(True, 'Olive use FP16 on optimization'),
+ "olive_vae_encoder_float32": OptionInfo(False, 'Olive force FP32 for VAE Encoder'),
+ "olive_static_dims": OptionInfo(True, 'Olive use static dimensions'),
+ "olive_cache_optimized": OptionInfo(True, 'Olive cache optimized models'),
"ipex_sep": OptionInfo("IPEX
", "", gr.HTML, {"visible": devices.backend == "ipex"}),
"ipex_optimize": OptionInfo([], "IPEX Optimize for Intel GPUs", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "Upscaler"], "visible": devices.backend == "ipex"}),
@@ -543,91 +577,55 @@ def get_default_modes():
"directml_sep": OptionInfo("DirectML
", "", gr.HTML, {"visible": devices.backend == "directml"}),
"directml_memory_provider": OptionInfo(default_memory_provider, 'DirectML memory stats provider', gr.Radio, {"choices": memory_providers, "visible": devices.backend == "directml"}),
"directml_catch_nan": OptionInfo(False, "DirectML retry ops for NaN", gr.Checkbox, {"visible": devices.backend == "directml"}),
-
- "olive_sep": OptionInfo("Olive
", "", gr.HTML),
- "olive_float16": OptionInfo(True, 'Olive use FP16 on optimization'),
- "olive_vae_encoder_float32": OptionInfo(False, 'Olive force FP32 for VAE Encoder'),
- "olive_static_dims": OptionInfo(True, 'Olive use static dimensions'),
- "olive_cache_optimized": OptionInfo(True, 'Olive cache optimized models'),
-}))
-
-options_templates.update(options_section(('diffusers', "Diffusers Settings"), {
- "diffusers_pipeline": OptionInfo('Autodetect', 'Diffusers pipeline', gr.Dropdown, lambda: {"choices": list(shared_items.get_pipelines()) }),
- "diffuser_cache_config": OptionInfo(True, "Use cached model config when available"),
- "diffusers_move_base": OptionInfo(False, "Move base model to CPU when using refiner"),
- "diffusers_move_unet": OptionInfo(False, "Move base model to CPU when using VAE"),
- "diffusers_move_refiner": OptionInfo(False, "Move refiner model to CPU when not in use"),
- "diffusers_extract_ema": OptionInfo(False, "Use model EMA weights when possible"),
- "diffusers_generator_device": OptionInfo("GPU", "Generator device", gr.Radio, {"choices": ["GPU", "CPU", "Unset"]}),
- "diffusers_offload_mode": OptionInfo(startup_offload_mode, "Model offload mode", gr.Radio, {"choices": ['none', 'balanced', 'model', 'sequential']}),
- "diffusers_offload_min_gpu_memory": OptionInfo(0.25, "Balanced offload GPU low watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
- "diffusers_offload_max_gpu_memory": OptionInfo(0.70, "Balanced offload GPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
- "diffusers_offload_max_cpu_memory": OptionInfo(0.75, "Balanced offload CPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
- "diffusers_vae_upcast": OptionInfo("default", "VAE upcasting", gr.Radio, {"choices": ['default', 'true', 'false']}),
- "diffusers_vae_slicing": OptionInfo(True, "VAE slicing"),
- "diffusers_vae_tiling": OptionInfo(cmd_opts.lowvram or cmd_opts.medvram, "VAE tiling"),
- "diffusers_model_load_variant": OptionInfo("default", "Preferred Model variant", gr.Radio, {"choices": ['default', 'fp32', 'fp16']}),
- "diffusers_vae_load_variant": OptionInfo("default", "Preferred VAE variant", gr.Radio, {"choices": ['default', 'fp32', 'fp16']}),
- "custom_diffusers_pipeline": OptionInfo('', 'Load custom Diffusers pipeline'),
- "diffusers_eval": OptionInfo(True, "Force model eval"),
- "diffusers_to_gpu": OptionInfo(False, "Load model directly to GPU"),
- "disable_accelerate": OptionInfo(False, "Disable accelerate"),
- "diffusers_pooled": OptionInfo("default", "Diffusers SDXL pooled embeds", gr.Radio, {"choices": ['default', 'weighted']}),
- "diffusers_zeros_prompt_pad": OptionInfo(False, "Use zeros for prompt padding", gr.Checkbox),
- "huggingface_token": OptionInfo('', 'HuggingFace token'),
- "enable_linfusion": OptionInfo(False, "Apply LinFusion distillation on load"),
-
- "onnx_sep": OptionInfo("ONNX Runtime
", "", gr.HTML),
- "onnx_execution_provider": OptionInfo(execution_providers.get_default_execution_provider().value, 'Execution Provider', gr.Dropdown, lambda: {"choices": execution_providers.available_execution_providers }),
- "onnx_cpu_fallback": OptionInfo(True, 'ONNX allow fallback to CPU'),
- "onnx_cache_converted": OptionInfo(True, 'ONNX cache converted models'),
- "onnx_unload_base": OptionInfo(False, 'ONNX unload base model when processing refiner'),
}))
options_templates.update(options_section(('quantization', "Quantization Settings"), {
- "bnb_quantization": OptionInfo([], "BnB quantization enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder"], "visible": native}),
- "bnb_quantization_type": OptionInfo("nf4", "BnB quantization type", gr.Radio, {"choices": ['nf4', 'fp8', 'fp4'], "visible": native}),
- "bnb_quantization_storage": OptionInfo("uint8", "BnB quantization storage", gr.Radio, {"choices": ["float16", "float32", "int8", "uint8", "float64", "bfloat16"], "visible": native}),
- "optimum_quanto_weights": OptionInfo([], "Optimum.quanto quantization enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "ControlNet"], "visible": native}),
- "optimum_quanto_weights_type": OptionInfo("qint8", "Optimum.quanto quantization type", gr.Radio, {"choices": ['qint8', 'qfloat8_e4m3fn', 'qfloat8_e5m2', 'qint4', 'qint2'], "visible": native}),
- "optimum_quanto_activations_type": OptionInfo("none", "Optimum.quanto quantization activations ", gr.Radio, {"choices": ['none', 'qint8', 'qfloat8_e4m3fn', 'qfloat8_e5m2'], "visible": native}),
- "torchao_quantization": OptionInfo([], "TorchAO quantization enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder"], "visible": native}),
- "torchao_quantization_mode": OptionInfo("pre", "TorchAO quantization mode", gr.Radio, {"choices": ['pre', 'post'], "visible": native}),
- "torchao_quantization_type": OptionInfo("int8", "TorchAO quantization type", gr.Radio, {"choices": ["int8+act", "int8", "int4", "fp8+act", "fp8", "fpx"], "visible": native}),
- "nncf_compress_weights": OptionInfo([], "NNCF compression enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "ControlNet"], "visible": native}),
- "nncf_compress_weights_mode": OptionInfo("INT8", "NNCF compress mode", gr.Radio, {"choices": ['INT8', 'INT8_SYM', 'INT4_ASYM', 'INT4_SYM', 'NF4'] if cmd_opts.use_openvino else ['INT8']}),
- "nncf_compress_weights_raito": OptionInfo(1.0, "NNCF compress ratio", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01, "visible": cmd_opts.use_openvino}),
- "nncf_quantize": OptionInfo([], "NNCF OpenVINO quantization enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder"], "visible": cmd_opts.use_openvino}),
- "nncf_quant_mode": OptionInfo("INT8", "NNCF OpenVINO quantization mode", gr.Radio, {"choices": ['INT8', 'FP8_E4M3', 'FP8_E5M2'], "visible": cmd_opts.use_openvino}),
-
- "quant_shuffle_weights": OptionInfo(False, "Shuffle the weights between GPU and CPU when quantizing", gr.Checkbox, {"visible": native}),
+ "bnb_sep": OptionInfo("BitsAndBytes
", "", gr.HTML),
+ "bnb_quantization": OptionInfo([], "Enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder"], "visible": native}),
+ "bnb_quantization_type": OptionInfo("nf4", "Type", gr.Radio, {"choices": ['nf4', 'fp8', 'fp4'], "visible": native}),
+ "bnb_quantization_storage": OptionInfo("uint8", "Backend storage", gr.Radio, {"choices": ["float16", "float32", "int8", "uint8", "float64", "bfloat16"], "visible": native}),
+ "optimum_quanto_sep": OptionInfo("Optimum Quanto
", "", gr.HTML),
+ "optimum_quanto_weights": OptionInfo([], "Enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "ControlNet"], "visible": native}),
+ "optimum_quanto_weights_type": OptionInfo("qint8", "Type", gr.Radio, {"choices": ['qint8', 'qfloat8_e4m3fn', 'qfloat8_e5m2', 'qint4', 'qint2'], "visible": native}),
+ "optimum_quanto_activations_type": OptionInfo("none", "Activations ", gr.Radio, {"choices": ['none', 'qint8', 'qfloat8_e4m3fn', 'qfloat8_e5m2'], "visible": native}),
+ "torchao_sep": OptionInfo("TorchAO
", "", gr.HTML),
+ "torchao_quantization": OptionInfo([], "Enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder"], "visible": native}),
+ "torchao_quantization_mode": OptionInfo("pre", "Mode", gr.Radio, {"choices": ['pre', 'post'], "visible": native}),
+ "torchao_quantization_type": OptionInfo("int8", "Type", gr.Radio, {"choices": ["int8+act", "int8", "int4", "fp8+act", "fp8", "fpx"], "visible": native}),
+ "nncf_sep": OptionInfo("NNCF
", "", gr.HTML),
+ "nncf_compress_weights": OptionInfo([], "Enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "ControlNet"], "visible": native}),
+ "nncf_compress_weights_mode": OptionInfo("INT8", "Mode", gr.Radio, {"choices": ['INT8', 'INT8_SYM', 'INT4_ASYM', 'INT4_SYM', 'NF4'] if cmd_opts.use_openvino else ['INT8']}),
+ "nncf_compress_weights_raito": OptionInfo(1.0, "Compress ratio", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01, "visible": cmd_opts.use_openvino}),
+ "nncf_quantize": OptionInfo([], "OpenVINO enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder"], "visible": cmd_opts.use_openvino}),
+ "nncf_quant_mode": OptionInfo("INT8", "OpenVINO mode", gr.Radio, {"choices": ['INT8', 'FP8_E4M3', 'FP8_E5M2'], "visible": cmd_opts.use_openvino}),
+ "quant_shuffle_weights": OptionInfo(False, "Shuffle weights", gr.Checkbox, {"visible": native}),
}))
-options_templates.update(options_section(('advanced', "Inference Settings"), {
- "token_merging_sep": OptionInfo("Token merging
", "", gr.HTML),
+options_templates.update(options_section(('advanced', "Pipeline Modifiers"), {
+ "token_merging_sep": OptionInfo("Token Merging
", "", gr.HTML),
"token_merging_method": OptionInfo("None", "Token merging method", gr.Radio, {"choices": ['None', 'ToMe', 'ToDo']}),
"tome_ratio": OptionInfo(0.0, "ToMe token merging ratio", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.05}),
"todo_ratio": OptionInfo(0.0, "ToDo token merging ratio", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.05}),
"freeu_sep": OptionInfo("FreeU
", "", gr.HTML),
"freeu_enabled": OptionInfo(False, "FreeU"),
- "freeu_b1": OptionInfo(1.2, "1st stage backbone factor", gr.Slider, {"minimum": 1.0, "maximum": 2.0, "step": 0.01}),
- "freeu_b2": OptionInfo(1.4, "2nd stage backbone factor", gr.Slider, {"minimum": 1.0, "maximum": 2.0, "step": 0.01}),
- "freeu_s1": OptionInfo(0.9, "1st stage skip factor", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}),
- "freeu_s2": OptionInfo(0.2, "2nd stage skip factor", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}),
+ "freeu_b1": OptionInfo(1.2, "1st stage backbone", gr.Slider, {"minimum": 1.0, "maximum": 2.0, "step": 0.01}),
+ "freeu_b2": OptionInfo(1.4, "2nd stage backbone", gr.Slider, {"minimum": 1.0, "maximum": 2.0, "step": 0.01}),
+ "freeu_s1": OptionInfo(0.9, "1st stage skip", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}),
+ "freeu_s2": OptionInfo(0.2, "2nd stage skip", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}),
"pag_sep": OptionInfo("Perturbed-Attention Guidance
", "", gr.HTML),
"pag_apply_layers": OptionInfo("m0", "PAG layer names"),
"hypertile_sep": OptionInfo("HyperTile
", "", gr.HTML),
- "hypertile_hires_only": OptionInfo(False, "HyperTile hires pass only"),
- "hypertile_unet_enabled": OptionInfo(False, "HyperTile UNet"),
- "hypertile_unet_tile": OptionInfo(0, "HyperTile UNet tile size", gr.Slider, {"minimum": 0, "maximum": 1024, "step": 8}),
- "hypertile_unet_swap_size": OptionInfo(1, "HyperTile UNet swap size", gr.Slider, {"minimum": 1, "maximum": 10, "step": 1}),
- "hypertile_unet_depth": OptionInfo(0, "HyperTile UNet depth", gr.Slider, {"minimum": 0, "maximum": 4, "step": 1}),
- "hypertile_vae_enabled": OptionInfo(False, "HyperTile VAE", gr.Checkbox),
- "hypertile_vae_tile": OptionInfo(128, "HyperTile VAE tile size", gr.Slider, {"minimum": 0, "maximum": 1024, "step": 8}),
- "hypertile_vae_swap_size": OptionInfo(1, "HyperTile VAE swap size", gr.Slider, {"minimum": 1, "maximum": 10, "step": 1}),
+ "hypertile_hires_only": OptionInfo(False, "HiRes pass only"),
+ "hypertile_unet_enabled": OptionInfo(False, "UNet Enabled"),
+ "hypertile_unet_tile": OptionInfo(0, "UNet tile size", gr.Slider, {"minimum": 0, "maximum": 1024, "step": 8}),
+ "hypertile_unet_swap_size": OptionInfo(1, "UNet swap size", gr.Slider, {"minimum": 1, "maximum": 10, "step": 1}),
+ "hypertile_unet_depth": OptionInfo(0, "UNet depth", gr.Slider, {"minimum": 0, "maximum": 4, "step": 1}),
+ "hypertile_vae_enabled": OptionInfo(False, "VAE Enabled", gr.Checkbox),
+ "hypertile_vae_tile": OptionInfo(128, "VAE tile size", gr.Slider, {"minimum": 0, "maximum": 1024, "step": 8}),
+ "hypertile_vae_swap_size": OptionInfo(1, "VAE swap size", gr.Slider, {"minimum": 1, "maximum": 10, "step": 1}),
"hidiffusion_sep": OptionInfo("HiDiffusion
", "", gr.HTML),
"hidiffusion_raunet": OptionInfo(True, "Apply RAU-Net"),
@@ -636,16 +634,28 @@ def get_default_modes():
"hidiffusion_t1": OptionInfo(-1, "Override T1 ratio", gr.Slider, {"minimum": -1, "maximum": 1.0, "step": 0.05}),
"hidiffusion_t2": OptionInfo(-1, "Override T2 ratio", gr.Slider, {"minimum": -1, "maximum": 1.0, "step": 0.05}),
+ "linfusion_sep": OptionInfo("Batch
", "", gr.HTML),
+ "enable_linfusion": OptionInfo(False, "Apply LinFusion distillation on load"),
+
"inference_batch_sep": OptionInfo("Batch
", "", gr.HTML),
"sequential_seed": OptionInfo(True, "Batch mode uses sequential seeds"),
"batch_frame_mode": OptionInfo(False, "Parallel process images in batch"),
- "inference_other_sep": OptionInfo("Other
", "", gr.HTML),
- "inference_mode": OptionInfo("no-grad", "Torch inference mode", gr.Radio, {"choices": ["no-grad", "inference-mode", "none"]}),
- "sd_vae_sliced_encode": OptionInfo(False, "VAE sliced encode", gr.Checkbox, {"visible": not native}),
+}))
+
+options_templates.update(options_section(('compile', "Model Compile"), {
+ "cuda_compile_sep": OptionInfo("Model Compile
", "", gr.HTML),
+ "cuda_compile": OptionInfo([] if not cmd_opts.use_openvino else ["Model", "VAE"], "Compile Model", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "Upscaler"]}),
+ "cuda_compile_backend": OptionInfo("none" if not cmd_opts.use_openvino else "openvino_fx", "Model compile backend", gr.Radio, {"choices": ['none', 'inductor', 'cudagraphs', 'aot_ts_nvfuser', 'hidet', 'migraphx', 'ipex', 'onediff', 'stable-fast', 'deep-cache', 'olive-ai', 'openvino_fx']}),
+ "cuda_compile_mode": OptionInfo("default", "Model compile mode", gr.Radio, {"choices": ['default', 'reduce-overhead', 'max-autotune', 'max-autotune-no-cudagraphs']}),
+ "cuda_compile_fullgraph": OptionInfo(True if not cmd_opts.use_openvino else False, "Model compile fullgraph"),
+ "cuda_compile_precompile": OptionInfo(False, "Model compile precompile"),
+ "cuda_compile_verbose": OptionInfo(False, "Model compile verbose mode"),
+ "cuda_compile_errors": OptionInfo(True, "Model compile suppress errors"),
+ "deep_cache_interval": OptionInfo(3, "DeepCache cache interval", gr.Slider, {"minimum": 1, "maximum": 10, "step": 1}),
}))
options_templates.update(options_section(('system-paths', "System Paths"), {
- "models_paths_sep_options": OptionInfo("Models paths
", "", gr.HTML),
+ "models_paths_sep_options": OptionInfo("Models Paths
", "", gr.HTML),
"models_dir": OptionInfo('models', "Base path where all models are stored", folder=True),
"ckpt_dir": OptionInfo(os.path.join(paths.models_path, 'Stable-diffusion'), "Folder with stable diffusion models", folder=True),
"diffusers_dir": OptionInfo(os.path.join(paths.models_path, 'Diffusers'), "Folder with Huggingface models", folder=True),
@@ -726,13 +736,13 @@ def get_default_modes():
}))
options_templates.update(options_section(('saving-paths', "Image Paths"), {
- "saving_sep_images": OptionInfo("Save options
", "", gr.HTML),
+ "saving_sep_images": OptionInfo("Save Options
", "", gr.HTML),
"save_images_add_number": OptionInfo(True, "Numbered filenames", component_args=hide_dirs),
"use_original_name_batch": OptionInfo(True, "Batch uses original name"),
"save_to_dirs": OptionInfo(False, "Save images to a subdirectory"),
"directories_filename_pattern": OptionInfo("[date]", "Directory name pattern", component_args=hide_dirs),
"samples_filename_pattern": OptionInfo("[seq]-[model_name]-[prompt_words]", "Images filename pattern", component_args=hide_dirs),
- "directories_max_prompt_words": OptionInfo(8, "Max words per pattern", gr.Slider, {"minimum": 1, "maximum": 99, "step": 1, **hide_dirs}),
+ "directories_max_prompt_words": OptionInfo(8, "Max words", gr.Slider, {"minimum": 1, "maximum": 99, "step": 1, **hide_dirs}),
"outdir_sep_dirs": OptionInfo("Folders
", "", gr.HTML),
"outdir_samples": OptionInfo("", "Images folder", component_args=hide_dirs, folder=True),
@@ -751,14 +761,14 @@ def get_default_modes():
"outdir_control_grids": OptionInfo("outputs/grids", 'Folder for control grids', component_args=hide_dirs, folder=True),
}))
-options_templates.update(options_section(('ui', "User Interface Options"), {
+options_templates.update(options_section(('ui', "User Interface"), {
"theme_type": OptionInfo("Standard", "Theme type", gr.Radio, {"choices": ["Modern", "Standard", "None"]}),
"theme_style": OptionInfo("Auto", "Theme mode", gr.Radio, {"choices": ["Auto", "Dark", "Light"]}),
"gradio_theme": OptionInfo("black-teal", "UI theme", gr.Dropdown, lambda: {"choices": theme.list_themes()}, refresh=theme.refresh_themes),
"autolaunch": OptionInfo(False, "Autolaunch browser upon startup"),
"font_size": OptionInfo(14, "Font size", gr.Slider, {"minimum": 8, "maximum": 32, "step": 1, "visible": True}),
"aspect_ratios": OptionInfo("1:1, 4:3, 3:2, 16:9, 16:10, 21:9, 2:3, 3:4, 9:16, 10:16, 9:21", "Allowed aspect ratios"),
- "motd": OptionInfo(True, "Show MOTD"),
+ "motd": OptionInfo(False, "Show MOTD"),
"compact_view": OptionInfo(False, "Compact view"),
"return_grid": OptionInfo(True, "Show grid in results"),
"return_mask": OptionInfo(False, "Inpainting include greyscale mask in results"),
@@ -770,14 +780,14 @@ def get_default_modes():
}))
options_templates.update(options_section(('live-preview', "Live Previews"), {
- "notification_audio_enable": OptionInfo(False, "Play a notification upon completion"),
- "notification_audio_path": OptionInfo("html/notification.mp3","Path to notification sound", component_args=hide_dirs, folder=True),
"show_progress_every_n_steps": OptionInfo(1, "Live preview display period", gr.Slider, {"minimum": 0, "maximum": 32, "step": 1}),
"show_progress_type": OptionInfo("Approximate", "Live preview method", gr.Radio, {"choices": ["Simple", "Approximate", "TAESD", "Full VAE"]}),
"live_preview_refresh_period": OptionInfo(500, "Progress update period", gr.Slider, {"minimum": 0, "maximum": 5000, "step": 25}),
"live_preview_taesd_layers": OptionInfo(3, "TAESD decode layers", gr.Slider, {"minimum": 1, "maximum": 3, "step": 1}),
"logmonitor_show": OptionInfo(True, "Show log view"),
"logmonitor_refresh_period": OptionInfo(5000, "Log view update period", gr.Slider, {"minimum": 0, "maximum": 30000, "step": 25}),
+ "notification_audio_enable": OptionInfo(False, "Play a notification upon completion"),
+ "notification_audio_path": OptionInfo("html/notification.mp3","Path to notification sound", component_args=hide_dirs, folder=True),
}))
options_templates.update(options_section(('sampler-params', "Sampler Settings"), {
@@ -816,7 +826,7 @@ def get_default_modes():
's_noise': OptionInfo(1.0, "Sigma noise", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01, "visible": not native}),
's_min': OptionInfo(0.0, "Sigma min", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01, "visible": not native}),
's_max': OptionInfo(0.0, "Sigma max", gr.Slider, {"minimum": 0.0, "maximum": 100.0, "step": 1.0, "visible": not native}),
- "schedulers_sep_compvis": OptionInfo("CompVis specific config
", "", gr.HTML, {"visible": not native}),
+ "schedulers_sep_compvis": OptionInfo("CompVis Config
", "", gr.HTML, {"visible": not native}),
'uni_pc_variant': OptionInfo("bh2", "UniPC variant", gr.Radio, {"choices": ["bh1", "bh2", "vary_coeff"], "visible": not native}),
'uni_pc_skip_type': OptionInfo("time_uniform", "UniPC skip type", gr.Radio, {"choices": ["time_uniform", "time_quadratic", "logSNR"], "visible": not native}),
"ddim_discretize": OptionInfo('uniform', "DDIM discretize img2img", gr.Radio, {"choices": ['uniform', 'quad'], "visible": not native}),
@@ -849,7 +859,7 @@ def get_default_modes():
"detailer_unload": OptionInfo(False, "Move detailer model to CPU when complete"),
"detailer_augment": OptionInfo(True, "Detailer use model augment"),
- "postprocessing_sep_face_restore": OptionInfo("Face restore
", "", gr.HTML),
+ "postprocessing_sep_face_restore": OptionInfo("Face Restore
", "", gr.HTML),
"face_restoration_model": OptionInfo("None", "Face restoration", gr.Radio, lambda: {"choices": ['None'] + [x.name() for x in face_restorers]}),
"code_former_weight": OptionInfo(0.2, "CodeFormer weight parameter", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01}),
@@ -879,6 +889,15 @@ def get_default_modes():
"deepbooru_filter_tags": OptionInfo("", "Filter out tags from deepbooru output"),
}))
+options_templates.update(options_section(('huggingface', "Huggingface"), {
+ "huggingface_sep": OptionInfo("Huggingface
", "", gr.HTML),
+ "diffuser_cache_config": OptionInfo(True, "Use cached model config when available"),
+ "huggingface_token": OptionInfo('', 'HuggingFace token'),
+ "diffusers_model_load_variant": OptionInfo("default", "Preferred Model variant", gr.Radio, {"choices": ['default', 'fp32', 'fp16']}),
+ "diffusers_vae_load_variant": OptionInfo("default", "Preferred VAE variant", gr.Radio, {"choices": ['default', 'fp32', 'fp16']}),
+ "custom_diffusers_pipeline": OptionInfo('', 'Load custom Diffusers pipeline'),
+}))
+
options_templates.update(options_section(('extra_networks', "Networks"), {
"extra_networks_sep1": OptionInfo("Networks UI
", "", gr.HTML),
"extra_networks_show": OptionInfo(True, "UI show on startup"),
diff --git a/scripts/cogvideo.py b/scripts/cogvideo.py
index c988c05c4..284109857 100644
--- a/scripts/cogvideo.py
+++ b/scripts/cogvideo.py
@@ -51,7 +51,7 @@ def video_type_change(video_type):
with gr.Row():
video_type = gr.Dropdown(label='Video file', choices=['None', 'GIF', 'PNG', 'MP4'], value='None')
duration = gr.Slider(label='Duration', minimum=0.25, maximum=30, step=0.25, value=8, visible=False)
- with gr.Accordion('Optional init video', open=False):
+ with gr.Accordion('Optional init image or video', open=False):
with gr.Row():
image = gr.Image(value=None, label='Image', type='pil', source='upload', width=256, height=256)
video = gr.Video(value=None, label='Video', source='upload', width=256, height=256)
@@ -169,25 +169,18 @@ def generate(self, p: processing.StableDiffusionProcessing, model: str):
callback_on_step_end=diffusers_callback,
callback_on_step_end_tensor_inputs=['latents'],
)
- if getattr(p, 'image', False):
- if 'I2V' not in model:
- shared.log.error(f'CogVideoX: model={model} image input not supported')
- return []
- args['image'] = self.image(p, p.image)
- args['num_frames'] = p.frames # only txt2vid has num_frames
- shared.sd_model = sd_models.switch_pipe(diffusers.CogVideoXImageToVideoPipeline, shared.sd_model)
- elif getattr(p, 'video', False):
- if 'I2V' in model:
- shared.log.error(f'CogVideoX: model={model} image input not supported')
- return []
- args['video'] = self.video(p, p.video)
- shared.sd_model = sd_models.switch_pipe(diffusers.CogVideoXVideoToVideoPipeline, shared.sd_model)
+ if 'I2V' in model:
+ if hasattr(p, 'video') and p.video is not None:
+ args['video'] = self.video(p, p.video)
+ shared.sd_model = sd_models.switch_pipe(diffusers.CogVideoXVideoToVideoPipeline, shared.sd_model)
+ elif (hasattr(p, 'image') and p.image is not None) or (hasattr(p, 'init_images') and len(p.init_images) > 0):
+ p.init_images = [p.image] if hasattr(p, 'image') and p.image is not None else p.init_images
+ args['image'] = self.image(p, p.init_images[0])
+ shared.sd_model = sd_models.switch_pipe(diffusers.CogVideoXImageToVideoPipeline, shared.sd_model)
else:
- if 'I2V' in model:
- shared.log.error(f'CogVideoX: model={model} image input not supported')
- return []
- args['num_frames'] = p.frames # only txt2vid has num_frames
shared.sd_model = sd_models.switch_pipe(diffusers.CogVideoXPipeline, shared.sd_model)
+ args['num_frames'] = p.frames # only txt2vid has num_frames
+ shared.log.info(f'CogVideoX: class={shared.sd_model.__class__.__name__} frames={p.frames} input={args.get('video', None) or args.get('image', None)}')
if debug:
shared.log.debug(f'CogVideoX args: {args}')
frames = shared.sd_model(**args).frames[0]
@@ -199,7 +192,7 @@ def generate(self, p: processing.StableDiffusionProcessing, model: str):
errors.display(e, 'CogVideoX')
t1 = time.time()
its = (len(frames) * p.steps) / (t1 - t0)
- shared.log.info(f'CogVideoX: frames={len(frames)} its={its:.2f} time={t1 - t0:.2f}')
+ shared.log.info(f'CogVideoX: frame={frames[0] if len(frames) > 0 else None} frames={len(frames)} its={its:.2f} time={t1 - t0:.2f}')
return frames
# auto-executed by the script-callback
From 944408e93b1cda1266fda6c2f1aeca9b4c30ee75 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Tue, 10 Dec 2024 10:39:13 -0500
Subject: [PATCH 097/162] warn on quanto with offload
Signed-off-by: Vladimir Mandic
---
modules/model_quant.py | 7 +++++--
modules/shared.py | 4 ++--
2 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/modules/model_quant.py b/modules/model_quant.py
index 9482fe898..03043b33a 100644
--- a/modules/model_quant.py
+++ b/modules/model_quant.py
@@ -58,11 +58,11 @@ def load_torchao(msg='', silent=False):
import torchao
ao = torchao
fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
- log.debug(f'Quantization: type=quanto version={ao.__version__} fn={fn}') # pylint: disable=protected-access
+ log.debug(f'Quantization: type=torchao version={ao.__version__} fn={fn}') # pylint: disable=protected-access
return ao
except Exception as e:
if len(msg) > 0:
- log.error(f"{msg} failed to import optimum.quanto: {e}")
+ log.error(f"{msg} failed to import torchao: {e}")
ao = None
if not silent:
raise
@@ -92,6 +92,7 @@ def load_bnb(msg='', silent=False):
def load_quanto(msg='', silent=False):
+ from modules import shared
global quanto # pylint: disable=global-statement
if quanto is not None:
return quanto
@@ -101,6 +102,8 @@ def load_quanto(msg='', silent=False):
quanto = optimum_quanto
fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
log.debug(f'Quantization: type=quanto version={quanto.__version__} fn={fn}') # pylint: disable=protected-access
+ if shared.opts.diffusers_offload_mode != 'none':
+ shared.log.error(f'Quantization: type=quanto offload={shared.opts.diffusers_offload_mode} not supported')
return quanto
except Exception as e:
if len(msg) > 0:
diff --git a/modules/shared.py b/modules/shared.py
index 3d7571029..17db4595f 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -549,7 +549,7 @@ def get_default_modes():
"diffusers_fuse_projections": OptionInfo(False, "Fused projections"),
"torch_expandable_segments": OptionInfo(False, "Expandable segments"),
"cuda_mem_fraction": OptionInfo(0.0, "Memory limit", gr.Slider, {"minimum": 0, "maximum": 2.0, "step": 0.05}),
- "torch_gc_threshold": OptionInfo(80, "GC threshold", gr.Slider, {"minimum": 0, "maximum": 100, "step": 1}),
+ "torch_gc_threshold": OptionInfo(70, "GC threshold", gr.Slider, {"minimum": 0, "maximum": 100, "step": 1}),
"inference_mode": OptionInfo("no-grad", "Inference mode", gr.Radio, {"choices": ["no-grad", "inference-mode", "none"]}),
"torch_malloc": OptionInfo("native", "Memory allocator", gr.Radio, {"choices": ['native', 'cudaMallocAsync'] }),
@@ -566,7 +566,7 @@ def get_default_modes():
"olive_cache_optimized": OptionInfo(True, 'Olive cache optimized models'),
"ipex_sep": OptionInfo("IPEX
", "", gr.HTML, {"visible": devices.backend == "ipex"}),
- "ipex_optimize": OptionInfo([], "IPEX Optimize for Intel GPUs", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "Upscaler"], "visible": devices.backend == "ipex"}),
+ "ipex_optimize": OptionInfo([], "IPEX Optimize", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "Upscaler"], "visible": devices.backend == "ipex"}),
"openvino_sep": OptionInfo("OpenVINO
", "", gr.HTML, {"visible": cmd_opts.use_openvino}),
"openvino_devices": OptionInfo([], "OpenVINO devices to use", gr.CheckboxGroup, {"choices": get_openvino_device_list() if cmd_opts.use_openvino else [], "visible": cmd_opts.use_openvino}), # pylint: disable=E0606
From beea969fd3429d8e78669ac18dee4b2f79b9571b Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Tue, 10 Dec 2024 12:34:27 -0500
Subject: [PATCH 098/162] update lora
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 7 ++++---
modules/lora/extra_networks_lora.py | 6 +++++-
modules/lora/networks.py | 19 ++++++++++++-------
modules/shared.py | 18 ++++++++++--------
wiki | 2 +-
5 files changed, 32 insertions(+), 20 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5412861a8..8ade15e58 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -37,9 +37,8 @@
- LoRA weights are no longer calculated on-the-fly during model execution, but are pre-calculated at the start
this results in perceived overhead on generate startup, but results in overall faster execution as LoRA does not need to be processed on each step
thanks @AI-Casanova
- - *note*: LoRA weights backups are required so LoRA can be unapplied, but can take quite a lot of system memory
- if you know you will not need to unapply LoRA, you can disable backups in *settings -> networks -> lora fuse*
- in which case, you need to reload model to unapply LoRA
+ - LoRA weights can be applied/unapplied as on each generate or they can store weights backups for later use
+ this setting has large performance and resource implications, see [Offload](https://github.com/vladmandic/automatic/wiki/Offload) wiki for details
- **Model loader** improvements:
- detect model components on model load fail
- allow passing absolute path to model loader
@@ -98,6 +97,8 @@
- fix prompt caching
- fix xyz grid skip final pass
- fix sd upscale script
+- fix cogvideox-i2v
+- lora auto-apply tags remove duplicates
## Update for 2024-11-21
diff --git a/modules/lora/extra_networks_lora.py b/modules/lora/extra_networks_lora.py
index 135df1ccb..42c4a92f6 100644
--- a/modules/lora/extra_networks_lora.py
+++ b/modules/lora/extra_networks_lora.py
@@ -44,6 +44,8 @@ def prompt(p):
loaded.tags = loaded.tags[:shared.opts.lora_apply_tags]
all_tags.extend(loaded.tags)
if len(all_tags) > 0:
+ all_tags = list(set(all_tags))
+ all_tags = [t for t in all_tags if t not in p.prompt]
shared.log.debug(f"Load network: type=LoRA tags={all_tags} max={shared.opts.lora_apply_tags} apply")
all_tags = ', '.join(all_tags)
p.extra_generation_params["LoRA tags"] = all_tags
@@ -121,13 +123,15 @@ def activate(self, p, params_list, step=0, include=[], exclude=[]):
# shared.log.debug(f'Activate network: type=LoRA model="{shared.opts.sd_model_checkpoint}"')
self.active = True
self.model = shared.opts.sd_model_checkpoint
+ if 'text_encoder' in include:
+ networks.timer.clear(complete=True)
names, te_multipliers, unet_multipliers, dyn_dims = parse(p, params_list, step)
networks.network_load(names, te_multipliers, unet_multipliers, dyn_dims) # load
networks.network_activate(include, exclude)
if len(networks.loaded_networks) > 0 and len(networks.applied_layers) > 0 and step == 0:
infotext(p)
prompt(p)
- shared.log.info(f'Load network: type=LoRA apply={[n.name for n in networks.loaded_networks]} te={te_multipliers} unet={unet_multipliers} time={networks.timer.summary}')
+ shared.log.info(f'Load network: type=LoRA apply={[n.name for n in networks.loaded_networks]} mode={"fuse" if shared.opts.lora_fuse_diffusers else "backup"} te={te_multipliers} unet={unet_multipliers} time={networks.timer.summary}')
def deactivate(self, p):
if shared.native and len(networks.diffuser_loaded) > 0:
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index edd82f3e4..ada6f833d 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -42,6 +42,7 @@
# section: load networks from disk
def load_diffusers(name, network_on_disk, lora_scale=shared.opts.extra_networks_default_multiplier) -> Union[network.Network, None]:
+ t0 = time.time()
name = name.replace(".", "_")
shared.log.debug(f'Load network: type=LoRA name="{name}" file="{network_on_disk.filename}" detected={network_on_disk.sd_version} method=diffusers scale={lora_scale} fuse={shared.opts.lora_fuse_diffusers}')
if not shared.native:
@@ -67,6 +68,7 @@ def load_diffusers(name, network_on_disk, lora_scale=shared.opts.extra_networks_
diffuser_scales.append(lora_scale)
net = network.Network(name, network_on_disk)
net.mtime = os.path.getmtime(network_on_disk.filename)
+ timer.activate += time.time() - t0
return net
@@ -256,10 +258,12 @@ def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=Non
if len(diffuser_loaded) > 0:
shared.log.debug(f'Load network: type=LoRA loaded={diffuser_loaded} available={shared.sd_model.get_list_adapters()} active={shared.sd_model.get_active_adapters()} scales={diffuser_scales}')
try:
+ t0 = time.time()
shared.sd_model.set_adapters(adapter_names=diffuser_loaded, adapter_weights=diffuser_scales)
if shared.opts.lora_fuse_diffusers:
shared.sd_model.fuse_lora(adapter_names=diffuser_loaded, lora_scale=1.0, fuse_unet=True, fuse_text_encoder=True) # fuse uses fixed scale since later apply does the scaling
shared.sd_model.unload_lora_weights()
+ timer.activate += time.time() - t0
except Exception as e:
shared.log.error(f'Load network: type=LoRA {e}')
if debug:
@@ -301,16 +305,15 @@ def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.n
bnb = model_quant.load_bnb('Load network: type=LoRA', silent=True)
if bnb is not None:
with devices.inference_context():
- weights_backup = bnb.functional.dequantize_4bit(weight, quant_state=weight.quant_state, quant_type=weight.quant_type, blocksize=weight.blocksize,)
+ self.network_weights_backup = bnb.functional.dequantize_4bit(weight, quant_state=weight.quant_state, quant_type=weight.quant_type, blocksize=weight.blocksize,)
self.quant_state = weight.quant_state
self.quant_type = weight.quant_type
self.blocksize = weight.blocksize
else:
weights_backup = weight.clone()
- weights_backup = weights_backup.to(devices.cpu)
+ self.network_weights_backup = weights_backup.to(devices.cpu)
else:
- weights_backup = weight.clone()
- weights_backup = weights_backup.to(devices.cpu)
+ self.network_weights_backup = weight.clone().to(devices.cpu)
bias_backup = getattr(self, "network_bias_backup", None)
if bias_backup is None:
@@ -331,7 +334,10 @@ def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.n
def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], network_layer_name: str):
if shared.opts.diffusers_offload_mode == "none":
- self.to(devices.device)
+ try:
+ self.to(devices.device)
+ except Exception:
+ pass
batch_updown = None
batch_ex_bias = None
for net in loaded_networks:
@@ -501,7 +507,6 @@ def network_deactivate():
def network_activate(include=[], exclude=[]):
t0 = time.time()
- timer.clear(complete=True)
sd_model = getattr(shared.sd_model, "pipe", shared.sd_model) # wrapped model compatiblility
if shared.opts.diffusers_offload_mode == "sequential":
sd_models.disable_offload(sd_model)
@@ -552,7 +557,7 @@ def network_activate(include=[], exclude=[]):
if task is not None and len(applied_layers) == 0:
pbar.remove_task(task) # hide progress bar for no action
weights_devices, weights_dtypes = list(set([x for x in weights_devices if x is not None])), list(set([x for x in weights_dtypes if x is not None])) # noqa: C403 # pylint: disable=R1718
- timer.activate = time.time() - t0
+ timer.activate += time.time() - t0
if debug and len(loaded_networks) > 0:
shared.log.debug(f'Load network: type=LoRA networks={len(loaded_networks)} components={components} modules={total} apply={len(applied_layers)} device={weights_devices} dtype={weights_dtypes} backup={backup_size} fuse={shared.opts.lora_fuse_diffusers} time={timer.summary}')
modules.clear()
diff --git a/modules/shared.py b/modules/shared.py
index 17db4595f..256850d21 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -914,22 +914,24 @@ def get_default_modes():
"extra_networks_model_sep": OptionInfo("Models
", "", gr.HTML),
"extra_network_reference": OptionInfo(False, "Use reference values when available", gr.Checkbox),
- "extra_networks_embed_sep": OptionInfo("Embeddings
", "", gr.HTML),
- "diffusers_convert_embed": OptionInfo(False, "Auto-convert SD 1.5 embeddings to SDXL ", gr.Checkbox, {"visible": native}),
- "extra_networks_styles_sep": OptionInfo("Styles
", "", gr.HTML),
- "extra_networks_styles": OptionInfo(True, "Show built-in styles"),
- "extra_networks_wildcard_sep": OptionInfo("Wildcards
", "", gr.HTML),
- "wildcards_enabled": OptionInfo(True, "Enable file wildcards support"),
+
"extra_networks_lora_sep": OptionInfo("LoRA
", "", gr.HTML),
"extra_networks_default_multiplier": OptionInfo(1.0, "Default strength", gr.Slider, {"minimum": 0.0, "maximum": 2.0, "step": 0.01}),
"lora_preferred_name": OptionInfo("filename", "LoRA preferred name", gr.Radio, {"choices": ["filename", "alias"], "visible": False}),
- "lora_add_hashes_to_infotext": OptionInfo(False, "LoRA add hash info"),
+ "lora_add_hashes_to_infotext": OptionInfo(False, "LoRA add hash info to metadata"),
"lora_fuse_diffusers": OptionInfo(True, "LoRA fuse directly to model"),
"lora_force_diffusers": OptionInfo(False if not cmd_opts.use_openvino else True, "LoRA force loading of all models using Diffusers"),
"lora_maybe_diffusers": OptionInfo(False, "LoRA force loading of specific models using Diffusers"),
"lora_apply_tags": OptionInfo(0, "LoRA auto-apply tags", gr.Slider, {"minimum": -1, "maximum": 32, "step": 1}),
"lora_in_memory_limit": OptionInfo(0, "LoRA memory cache", gr.Slider, {"minimum": 0, "maximum": 24, "step": 1}),
- "lora_quant": OptionInfo("NF4","LoRA precision in quantized models", gr.Radio, {"choices": ["NF4", "FP4"]}),
+ "lora_quant": OptionInfo("NF4","LoRA precision when quantized", gr.Radio, {"choices": ["NF4", "FP4"]}),
+
+ "extra_networks_styles_sep": OptionInfo("Styles
", "", gr.HTML),
+ "extra_networks_styles": OptionInfo(True, "Show built-in styles"),
+ "extra_networks_embed_sep": OptionInfo("Embeddings
", "", gr.HTML),
+ "diffusers_convert_embed": OptionInfo(False, "Auto-convert SD15 embeddings to SDXL ", gr.Checkbox, {"visible": native}),
+ "extra_networks_wildcard_sep": OptionInfo("Wildcards
", "", gr.HTML),
+ "wildcards_enabled": OptionInfo(True, "Enable file wildcards support"),
}))
options_templates.update(options_section((None, "Internal options"), {
diff --git a/wiki b/wiki
index 8960da514..95f174900 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit 8960da514e9aff4a5d47402925c9498536443379
+Subproject commit 95f1749005d56be490dab95cf92f4ca576d10396
From 8ec1c4f9c4981b33d4ae91dc8a703d21872adef6 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Tue, 10 Dec 2024 12:38:19 -0500
Subject: [PATCH 099/162] update bug report
Signed-off-by: Vladimir Mandic
---
.github/ISSUE_TEMPLATE/bug_report.yml | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
index cf176d6cf..a40320c63 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -106,10 +106,14 @@ body:
- StableDiffusion 1.5
- StableDiffusion 2.1
- StableDiffusion XL
- - StableDiffusion 3
- - PixArt
+ - StableDiffusion 3.x
- StableCascade
+ - FLUX.1
+ - PixArt
- Kandinsky
+ - Playground
+ - AuraFlow
+ - Any Video Model
- Other
default: 0
validations:
From f4847f1b8a1d4f8f607bd8b39763f4ebf6036c5f Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Tue, 10 Dec 2024 15:49:20 -0500
Subject: [PATCH 100/162] optimize balanced offload
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 6 ++-
modules/memstats.py | 8 +++-
modules/processing_diffusers.py | 4 +-
modules/processing_vae.py | 6 +--
modules/sd_models.py | 66 ++++++++++++++++++---------------
modules/shared.py | 1 +
scripts/cogvideo.py | 2 +-
7 files changed, 54 insertions(+), 39 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8ade15e58..4f6b10909 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -50,8 +50,10 @@
- **Memory** improvements:
- faster and more compatible *balanced offload* mode
- balanced offload: units are now in percentage instead of bytes
- - balanced offload: add both high and low watermark
- default is 25% for low-watermark (skip offload if memory usage is below 25%) and 70% high-watermark (must offload if memory usage is above 70%)
+ - balanced offload: add both high and low watermark and pinned threshold, defaults as below
+ 25% for low-watermark: skip offload if memory usage is below 25%
+ 70% high-watermark: must offload if memory usage is above 70%
+ 15% pin-watermark: any model component smaller than 15% of total memory is pinned and not offloaded
- change-in-behavior:
low-end systems, triggered by either `lowvrwam` or by detection of <=4GB will use *sequential offload*
all other systems use *balanced offload* by default (can be changed in settings)
diff --git a/modules/memstats.py b/modules/memstats.py
index fd5f152a0..d43e5bbfa 100644
--- a/modules/memstats.py
+++ b/modules/memstats.py
@@ -4,6 +4,8 @@
from modules import shared, errors
fail_once = False
+mem = {}
+
def gb(val: float):
return round(val / 1024 / 1024 / 1024, 2)
@@ -11,7 +13,7 @@ def gb(val: float):
def memory_stats():
global fail_once # pylint: disable=global-statement
- mem = {}
+ mem.clear()
try:
process = psutil.Process(os.getpid())
res = process.memory_info()
@@ -41,6 +43,10 @@ def memory_stats():
return mem
+def memory_cache():
+ return mem
+
+
def ram_stats():
try:
process = psutil.Process(os.getpid())
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index 627eb281f..9f12e44ab 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -349,7 +349,7 @@ def process_refine(p: processing.StableDiffusionProcessing, output):
def process_decode(p: processing.StableDiffusionProcessing, output):
- shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+ shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model, exclude=['vae'])
if output is not None:
if not hasattr(output, 'images') and hasattr(output, 'frames'):
shared.log.debug(f'Generated: frames={len(output.frames[0])}')
@@ -463,7 +463,7 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
timer.process.record('decode')
shared.sd_model = orig_pipeline
- shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+ # shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
if p.state == '':
global last_p # pylint: disable=global-statement
diff --git a/modules/processing_vae.py b/modules/processing_vae.py
index 1c4a45f07..77a89c512 100644
--- a/modules/processing_vae.py
+++ b/modules/processing_vae.py
@@ -104,8 +104,6 @@ def full_vae_decode(latents, model):
if shared.opts.diffusers_move_unet and not getattr(model, 'has_accelerate', False):
base_device = sd_models.move_base(model, devices.cpu)
- if shared.opts.diffusers_offload_mode == "balanced":
- shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
elif shared.opts.diffusers_offload_mode != "sequential":
sd_models.move_model(model.vae, devices.device)
@@ -159,8 +157,8 @@ def full_vae_decode(latents, model):
model.vae.apply(sd_models.convert_to_faketensors)
devices.torch_gc(force=True)
- if shared.opts.diffusers_offload_mode == "balanced":
- shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+ # if shared.opts.diffusers_offload_mode == "balanced":
+ # shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
elif shared.opts.diffusers_move_unet and not getattr(model, 'has_accelerate', False) and base_device is not None:
sd_models.move_base(model, base_device)
t1 = time.time()
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 8853916e4..bd69ba45b 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -18,7 +18,7 @@
from ldm.util import instantiate_from_config
from modules import paths, shared, shared_state, modelloader, devices, script_callbacks, sd_vae, sd_unet, errors, sd_models_config, sd_models_compile, sd_hijack_accelerate, sd_detect
from modules.timer import Timer, process as process_timer
-from modules.memstats import memory_stats
+from modules.memstats import memory_stats, memory_cache
from modules.modeldata import model_data
from modules.sd_checkpoint import CheckpointInfo, select_checkpoint, list_models, checkpoints_list, checkpoint_titles, get_closet_checkpoint_match, model_hash, update_model_hashes, setup_model, write_metadata, read_metadata_from_safetensors # pylint: disable=unused-import
@@ -416,9 +416,10 @@ def detach_hook(self, module):
offload_hook_instance = None
+offload_component_map = {}
-def apply_balanced_offload(sd_model):
+def apply_balanced_offload(sd_model, exclude=[]):
global offload_hook_instance # pylint: disable=global-statement
if shared.opts.diffusers_offload_mode != "balanced":
return sd_model
@@ -428,8 +429,6 @@ def apply_balanced_offload(sd_model):
excluded = ['OmniGenPipeline']
if sd_model.__class__.__name__ in excluded:
return sd_model
- fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
- debug_move(f'Apply offload: type=balanced fn={fn}')
checkpoint_name = sd_model.sd_checkpoint_info.name if getattr(sd_model, "sd_checkpoint_info", None) is not None else None
if checkpoint_name is None:
checkpoint_name = sd_model.__class__.__name__
@@ -442,32 +441,38 @@ def apply_balanced_offload_to_module(pipe):
keys = pipe._internal_dict.keys() # pylint: disable=protected-access
else:
keys = get_signature(pipe).keys()
+ keys = [k for k in keys if k not in exclude and not k.startswith('_')]
for module_name in keys: # pylint: disable=protected-access
module = getattr(pipe, module_name, None)
- if isinstance(module, torch.nn.Module):
- network_layer_name = getattr(module, "network_layer_name", None)
- device_map = getattr(module, "balanced_offload_device_map", None)
- max_memory = getattr(module, "balanced_offload_max_memory", None)
- module = accelerate.hooks.remove_hook_from_module(module, recurse=True)
- try:
- do_offload = used_gpu > 100 * shared.opts.diffusers_offload_min_gpu_memory
- debug_move(f'Balanced offload: gpu={used_gpu} ram={used_ram} current={module.device} dtype={module.dtype} op={"move" if do_offload else "skip"} component={module.__class__.__name__}')
- if do_offload:
- module = module.to(devices.cpu)
- used_gpu, used_ram = devices.torch_gc(fast=True, force=True)
- except Exception as e:
- if 'bitsandbytes' not in str(e):
- shared.log.error(f'Balanced offload: module={module_name} {e}')
- if os.environ.get('SD_MOVE_DEBUG', None):
- errors.display(e, f'Balanced offload: module={module_name}')
- module.offload_dir = os.path.join(shared.opts.accelerate_offload_path, checkpoint_name, module_name)
- module = accelerate.hooks.add_hook_to_module(module, offload_hook_instance, append=True)
- module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
- if network_layer_name:
- module.network_layer_name = network_layer_name
- if device_map and max_memory:
- module.balanced_offload_device_map = device_map
- module.balanced_offload_max_memory = max_memory
+ if not isinstance(module, torch.nn.Module):
+ continue
+ network_layer_name = getattr(module, "network_layer_name", None)
+ device_map = getattr(module, "balanced_offload_device_map", None)
+ max_memory = getattr(module, "balanced_offload_max_memory", None)
+ module = accelerate.hooks.remove_hook_from_module(module, recurse=True)
+ module_size = offload_component_map.get(module_name, None)
+ if module_size is None:
+ module_size = sum(p.numel()*p.element_size() for p in module.parameters(recurse=True)) / 1024 / 1024 / 1024
+ offload_component_map[module_name] = module_size
+ do_offload = (used_gpu > 100 * shared.opts.diffusers_offload_min_gpu_memory) and (module_size > shared.gpu_memory * shared.opts.diffusers_offload_pin_gpu_memory)
+ try:
+ debug_move(f'Balanced offload: gpu={used_gpu} ram={used_ram} current={module.device} dtype={module.dtype} op={"move" if do_offload else "skip"} component={module.__class__.__name__} size={module_size:.3f}')
+ if do_offload and module.device != devices.cpu:
+ module = module.to(devices.cpu)
+ used_gpu, used_ram = devices.torch_gc(fast=True, force=True)
+ except Exception as e:
+ if 'bitsandbytes' not in str(e):
+ shared.log.error(f'Balanced offload: module={module_name} {e}')
+ if os.environ.get('SD_MOVE_DEBUG', None):
+ errors.display(e, f'Balanced offload: module={module_name}')
+ module.offload_dir = os.path.join(shared.opts.accelerate_offload_path, checkpoint_name, module_name)
+ module = accelerate.hooks.add_hook_to_module(module, offload_hook_instance, append=True)
+ module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
+ if network_layer_name:
+ module.network_layer_name = network_layer_name
+ if device_map and max_memory:
+ module.balanced_offload_device_map = device_map
+ module.balanced_offload_max_memory = max_memory
apply_balanced_offload_to_module(sd_model)
if hasattr(sd_model, "pipe"):
@@ -478,7 +483,10 @@ def apply_balanced_offload_to_module(pipe):
apply_balanced_offload_to_module(sd_model.decoder_pipe)
set_accelerate(sd_model)
devices.torch_gc(fast=True)
- process_timer.add('offload', time.time() - t0)
+ t = time.time() - t0
+ process_timer.add('offload', t)
+ fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
+ debug_move(f'Apply offload: time={t:.2f} type=balanced fn={fn}')
return sd_model
diff --git a/modules/shared.py b/modules/shared.py
index 256850d21..97c32bd84 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -483,6 +483,7 @@ def get_default_modes():
"diffusers_offload_mode": OptionInfo(startup_offload_mode, "Model offload mode", gr.Radio, {"choices": ['none', 'balanced', 'model', 'sequential']}),
"diffusers_offload_min_gpu_memory": OptionInfo(0.25, "Balanced offload GPU low watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
"diffusers_offload_max_gpu_memory": OptionInfo(0.70, "Balanced offload GPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
+ "diffusers_offload_pin_gpu_memory": OptionInfo(0.15, "Balanced offload GPU pin watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
"diffusers_offload_max_cpu_memory": OptionInfo(0.90, "Balanced offload CPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
"advanced_sep": OptionInfo("Advanced Options
", "", gr.HTML),
diff --git a/scripts/cogvideo.py b/scripts/cogvideo.py
index 284109857..a5efcd3e6 100644
--- a/scripts/cogvideo.py
+++ b/scripts/cogvideo.py
@@ -180,7 +180,7 @@ def generate(self, p: processing.StableDiffusionProcessing, model: str):
else:
shared.sd_model = sd_models.switch_pipe(diffusers.CogVideoXPipeline, shared.sd_model)
args['num_frames'] = p.frames # only txt2vid has num_frames
- shared.log.info(f'CogVideoX: class={shared.sd_model.__class__.__name__} frames={p.frames} input={args.get('video', None) or args.get('image', None)}')
+ shared.log.info(f"CogVideoX: class={shared.sd_model.__class__.__name__} frames={p.frames} input={args.get('video', None) or args.get('image', None)}")
if debug:
shared.log.debug(f'CogVideoX args: {args}')
frames = shared.sd_model(**args).frames[0]
From 9a588d9c91033b162ef1520417993ea7edc5a762 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Wed, 11 Dec 2024 12:06:03 -0500
Subject: [PATCH 101/162] update balanced offload
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 7 +++---
modules/devices.py | 54 ++++++++++++++++++++++++--------------------
modules/sd_models.py | 52 ++++++++++++++++++++++++++++--------------
modules/shared.py | 3 +--
wiki | 2 +-
5 files changed, 70 insertions(+), 48 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4f6b10909..4b8f4994c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -50,10 +50,9 @@
- **Memory** improvements:
- faster and more compatible *balanced offload* mode
- balanced offload: units are now in percentage instead of bytes
- - balanced offload: add both high and low watermark and pinned threshold, defaults as below
- 25% for low-watermark: skip offload if memory usage is below 25%
- 70% high-watermark: must offload if memory usage is above 70%
- 15% pin-watermark: any model component smaller than 15% of total memory is pinned and not offloaded
+ - balanced offload: add both high and low watermark, defaults as below
+ `0.25` for low-watermark: skip offload if memory usage is below 25%
+ `0.70` high-watermark: must offload if memory usage is above 70%
- change-in-behavior:
low-end systems, triggered by either `lowvrwam` or by detection of <=4GB will use *sequential offload*
all other systems use *balanced offload* by default (can be changed in settings)
diff --git a/modules/devices.py b/modules/devices.py
index 71eef5726..51b770481 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -187,27 +187,34 @@ def get_device_for(task): # pylint: disable=unused-argument
def torch_gc(force=False, fast=False):
+ def get_stats():
+ mem_dict = memstats.memory_stats()
+ gpu_dict = mem_dict.get('gpu', {})
+ ram_dict = mem_dict.get('ram', {})
+ oom = gpu_dict.get('oom', 0)
+ ram = ram_dict.get('used', 0)
+ if backend == "directml":
+ gpu = torch.cuda.memory_allocated() / (1 << 30)
+ else:
+ gpu = gpu_dict.get('used', 0)
+ used_gpu = round(100 * gpu / gpu_dict.get('total', 1)) if gpu_dict.get('total', 1) > 1 else 0
+ used_ram = round(100 * ram / ram_dict.get('total', 1)) if ram_dict.get('total', 1) > 1 else 0
+ return gpu, used_gpu, ram, used_ram, oom
+
+ global previous_oom # pylint: disable=global-statement
import gc
from modules import timer, memstats
from modules.shared import cmd_opts
+
t0 = time.time()
- mem = memstats.memory_stats()
- gpu = mem.get('gpu', {})
- ram = mem.get('ram', {})
- oom = gpu.get('oom', 0)
- if backend == "directml":
- used_gpu = round(100 * torch.cuda.memory_allocated() / (1 << 30) / gpu.get('total', 1)) if gpu.get('total', 1) > 1 else 0
- else:
- used_gpu = round(100 * gpu.get('used', 0) / gpu.get('total', 1)) if gpu.get('total', 1) > 1 else 0
- used_ram = round(100 * ram.get('used', 0) / ram.get('total', 1)) if ram.get('total', 1) > 1 else 0
- global previous_oom # pylint: disable=global-statement
+ gpu, used_gpu, ram, used_ram, oom = get_stats()
threshold = 0 if (cmd_opts.lowvram and not cmd_opts.use_zluda) else opts.torch_gc_threshold
collected = 0
if force or threshold == 0 or used_gpu >= threshold or used_ram >= threshold:
force = True
if oom > previous_oom:
previous_oom = oom
- log.warning(f'Torch GPU out-of-memory error: {mem}')
+ log.warning(f'Torch GPU out-of-memory error: {memstats.memory_stats()}')
force = True
if force:
# actual gc
@@ -215,25 +222,24 @@ def torch_gc(force=False, fast=False):
if cuda_ok:
try:
with torch.cuda.device(get_cuda_device_string()):
+ torch.cuda.synchronize()
torch.cuda.empty_cache() # cuda gc
torch.cuda.ipc_collect()
except Exception:
pass
+ else:
+ return gpu, ram
t1 = time.time()
- if 'gc' not in timer.process.records:
- timer.process.records['gc'] = 0
- timer.process.records['gc'] += t1 - t0
- if not force or collected == 0:
- return used_gpu, used_ram
- mem = memstats.memory_stats()
- saved = round(gpu.get('used', 0) - mem.get('gpu', {}).get('used', 0), 2)
- before = { 'gpu': gpu.get('used', 0), 'ram': ram.get('used', 0) }
- after = { 'gpu': mem.get('gpu', {}).get('used', 0), 'ram': mem.get('ram', {}).get('used', 0), 'retries': mem.get('retries', 0), 'oom': mem.get('oom', 0) }
- utilization = { 'gpu': used_gpu, 'ram': used_ram, 'threshold': threshold }
- results = { 'collected': collected, 'saved': saved }
+ timer.process.add('gc', t1 - t0)
+
+ new_gpu, new_used_gpu, new_ram, new_used_ram, oom = get_stats()
+ before = { 'gpu': gpu, 'ram': ram }
+ after = { 'gpu': new_gpu, 'ram': new_ram, 'oom': oom }
+ utilization = { 'gpu': new_used_gpu, 'ram': new_used_ram, 'threshold': threshold }
+ results = { 'saved': round(gpu - new_gpu, 2), 'collected': collected }
fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
- log.debug(f'GC: utilization={utilization} gc={results} before={before} after={after} device={torch.device(get_optimal_device_name())} fn={fn} time={round(t1 - t0, 2)}') # pylint: disable=protected-access
- return used_gpu, used_ram
+ log.debug(f'GC: utilization={utilization} gc={results} before={before} after={after} device={torch.device(get_optimal_device_name())} fn={fn} time={round(t1 - t0, 2)}')
+ return new_gpu, new_ram
def set_cuda_sync_mode(mode):
diff --git a/modules/sd_models.py b/modules/sd_models.py
index bd69ba45b..85a5dc5d7 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -18,7 +18,7 @@
from ldm.util import instantiate_from_config
from modules import paths, shared, shared_state, modelloader, devices, script_callbacks, sd_vae, sd_unet, errors, sd_models_config, sd_models_compile, sd_hijack_accelerate, sd_detect
from modules.timer import Timer, process as process_timer
-from modules.memstats import memory_stats, memory_cache
+from modules.memstats import memory_stats
from modules.modeldata import model_data
from modules.sd_checkpoint import CheckpointInfo, select_checkpoint, list_models, checkpoints_list, checkpoint_titles, get_closet_checkpoint_match, model_hash, update_model_hashes, setup_model, write_metadata, read_metadata_from_safetensors # pylint: disable=unused-import
@@ -35,6 +35,8 @@
diffusers_version = int(diffusers.__version__.split('.')[1])
checkpoint_tiles = checkpoint_titles # legacy compatibility
should_offload = ['sc', 'sd3', 'f1', 'hunyuandit', 'auraflow', 'omnigen']
+offload_hook_instance = None
+offload_component_map = {}
class NoWatermark:
@@ -415,10 +417,6 @@ def detach_hook(self, module):
return module
-offload_hook_instance = None
-offload_component_map = {}
-
-
def apply_balanced_offload(sd_model, exclude=[]):
global offload_hook_instance # pylint: disable=global-statement
if shared.opts.diffusers_offload_mode != "balanced":
@@ -433,6 +431,29 @@ def apply_balanced_offload(sd_model, exclude=[]):
if checkpoint_name is None:
checkpoint_name = sd_model.__class__.__name__
+ def get_pipe_modules(pipe):
+ if hasattr(pipe, "_internal_dict"):
+ modules_names = pipe._internal_dict.keys() # pylint: disable=protected-access
+ else:
+ modules_names = get_signature(pipe).keys()
+ modules_names = [m for m in modules_names if m not in exclude and not m.startswith('_')]
+ modules = {}
+ for module_name in modules_names:
+ module_size = offload_component_map.get(module_name, None)
+ if module_size is None:
+ module = getattr(pipe, module_name, None)
+ if not isinstance(module, torch.nn.Module):
+ continue
+ try:
+ module_size = sum(p.numel()*p.element_size() for p in module.parameters(recurse=True)) / 1024 / 1024 / 1024
+ except Exception as e:
+ shared.log.error(f'Balanced offload: module={module_name} {e}')
+ module_size = 0
+ offload_component_map[module_name] = module_size
+ modules[module_name] = module_size
+ modules = sorted(modules.items(), key=lambda x: x[1], reverse=True)
+ return modules
+
def apply_balanced_offload_to_module(pipe):
used_gpu, used_ram = devices.torch_gc(fast=True)
if hasattr(pipe, "pipe"):
@@ -442,24 +463,20 @@ def apply_balanced_offload_to_module(pipe):
else:
keys = get_signature(pipe).keys()
keys = [k for k in keys if k not in exclude and not k.startswith('_')]
- for module_name in keys: # pylint: disable=protected-access
+ for module_name, module_size in get_pipe_modules(pipe): # pylint: disable=protected-access
module = getattr(pipe, module_name, None)
- if not isinstance(module, torch.nn.Module):
- continue
network_layer_name = getattr(module, "network_layer_name", None)
device_map = getattr(module, "balanced_offload_device_map", None)
max_memory = getattr(module, "balanced_offload_max_memory", None)
module = accelerate.hooks.remove_hook_from_module(module, recurse=True)
- module_size = offload_component_map.get(module_name, None)
- if module_size is None:
- module_size = sum(p.numel()*p.element_size() for p in module.parameters(recurse=True)) / 1024 / 1024 / 1024
- offload_component_map[module_name] = module_size
- do_offload = (used_gpu > 100 * shared.opts.diffusers_offload_min_gpu_memory) and (module_size > shared.gpu_memory * shared.opts.diffusers_offload_pin_gpu_memory)
+ perc_gpu = used_gpu / shared.gpu_memory
try:
- debug_move(f'Balanced offload: gpu={used_gpu} ram={used_ram} current={module.device} dtype={module.dtype} op={"move" if do_offload else "skip"} component={module.__class__.__name__} size={module_size:.3f}')
- if do_offload and module.device != devices.cpu:
- module = module.to(devices.cpu)
- used_gpu, used_ram = devices.torch_gc(fast=True, force=True)
+ prev_gpu = used_gpu
+ do_offload = (perc_gpu > shared.opts.diffusers_offload_min_gpu_memory) and (module.device != devices.cpu)
+ if do_offload:
+ module = module.to(devices.cpu, non_blocking=True)
+ used_gpu -= module_size
+ debug_move(f'Balanced offload: op={"move" if do_offload else "skip"} gpu={prev_gpu:.3f}:{used_gpu:.3f} perc={perc_gpu:.2f} ram={used_ram:.3f} current={module.device} dtype={module.dtype} component={module.__class__.__name__} size={module_size:.3f}')
except Exception as e:
if 'bitsandbytes' not in str(e):
shared.log.error(f'Balanced offload: module={module_name} {e}')
@@ -473,6 +490,7 @@ def apply_balanced_offload_to_module(pipe):
if device_map and max_memory:
module.balanced_offload_device_map = device_map
module.balanced_offload_max_memory = max_memory
+ devices.torch_gc(fast=True, force=True)
apply_balanced_offload_to_module(sd_model)
if hasattr(sd_model, "pipe"):
diff --git a/modules/shared.py b/modules/shared.py
index 97c32bd84..01add6d60 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -483,8 +483,7 @@ def get_default_modes():
"diffusers_offload_mode": OptionInfo(startup_offload_mode, "Model offload mode", gr.Radio, {"choices": ['none', 'balanced', 'model', 'sequential']}),
"diffusers_offload_min_gpu_memory": OptionInfo(0.25, "Balanced offload GPU low watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
"diffusers_offload_max_gpu_memory": OptionInfo(0.70, "Balanced offload GPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
- "diffusers_offload_pin_gpu_memory": OptionInfo(0.15, "Balanced offload GPU pin watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
- "diffusers_offload_max_cpu_memory": OptionInfo(0.90, "Balanced offload CPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
+ "diffusers_offload_max_cpu_memory": OptionInfo(0.90, "Balanced offload CPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01, "visible": False }),
"advanced_sep": OptionInfo("Advanced Options
", "", gr.HTML),
"sd_checkpoint_autoload": OptionInfo(True, "Model autoload on start"),
diff --git a/wiki b/wiki
index 95f174900..db828893c 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit 95f1749005d56be490dab95cf92f4ca576d10396
+Subproject commit db828893c803f1d5d0180cfe09689884bf27af2d
From c3b0c0a3bfb96032a65a41db91d1867feb934c02 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Wed, 11 Dec 2024 12:32:34 -0500
Subject: [PATCH 102/162] add SD_NO_CACHE env variable
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 1 +
modules/files_cache.py | 8 ++++----
modules/shared.py | 4 +++-
3 files changed, 8 insertions(+), 5 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4b8f4994c..f215b84ec 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -83,6 +83,7 @@
### Fixes
+- add `SD_NO_CACHE=true` env variable to disable file/folder caching
- update `diffusers`
- fix README links
- fix sdxl controlnet single-file loader
diff --git a/modules/files_cache.py b/modules/files_cache.py
index fa2241afc..d65e0f4f4 100644
--- a/modules/files_cache.py
+++ b/modules/files_cache.py
@@ -6,6 +6,7 @@
from installer import log
+do_cache_folders = os.environ.get('SD_NO_CACHE', None) is None
class Directory: # forward declaration
...
@@ -87,8 +88,6 @@ def is_stale(self) -> bool:
return not self.is_directory or self.mtime != self.live_mtime
-
-
class DirectoryCache(UserDict, DirectoryCollection):
def __delattr__(self, directory_path: str) -> None:
directory: Directory = get_directory(directory_path, fetch=False)
@@ -126,7 +125,7 @@ def clean_directory(directory: Directory, /, recursive: RecursiveType=False) ->
return is_clean
-def get_directory(directory_or_path: str, /, fetch:bool=True) -> Union[Directory, None]:
+def get_directory(directory_or_path: str, /, fetch: bool=True) -> Union[Directory, None]:
if isinstance(directory_or_path, Directory):
if directory_or_path.is_directory:
return directory_or_path
@@ -136,8 +135,9 @@ def get_directory(directory_or_path: str, /, fetch:bool=True) -> Union[Directory
if not cache_folders.get(directory_or_path, None):
if fetch:
directory = fetch_directory(directory_path=directory_or_path)
- if directory:
+ if directory and do_cache_folders:
cache_folders[directory_or_path] = directory
+ return directory
else:
clean_directory(cache_folders[directory_or_path])
return cache_folders[directory_or_path] if directory_or_path in cache_folders else None
diff --git a/modules/shared.py b/modules/shared.py
index 01add6d60..f383ee2b6 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -16,7 +16,7 @@
import orjson
import diffusers
from rich.console import Console
-from modules import errors, devices, shared_items, shared_state, cmd_args, theme, history
+from modules import errors, devices, shared_items, shared_state, cmd_args, theme, history, files_cache
from modules.paths import models_path, script_path, data_path, sd_configs_path, sd_default_config, sd_model_file, default_sd_model_file, extensions_dir, extensions_builtin_dir # pylint: disable=W0611
from modules.dml import memory_providers, default_memory_provider, directml_do_hijack
from modules.onnx_impl import initialize_onnx, execution_providers
@@ -238,6 +238,8 @@ def default(obj):
mem_stat = memory_stats()
gpu_memory = mem_stat['gpu']['total'] if "gpu" in mem_stat else 0
native = backend == Backend.DIFFUSERS
+if not files_cache.do_cache_folders:
+ log.warning('File cache disabled: ')
class OptionInfo:
From 8cea43f7db8099e3de909e0ffce5280906c7e922 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Wed, 11 Dec 2024 13:10:05 -0500
Subject: [PATCH 103/162] lora add erorr handler for partial offload
Signed-off-by: Vladimir Mandic
---
modules/lora/networks.py | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index ada6f833d..5d285af95 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -346,7 +346,10 @@ def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
continue
try:
t0 = time.time()
- weight = self.weight.to(devices.device)
+ try:
+ weight = self.weight.to(devices.device)
+ except Exception:
+ weight = self.weight
updown, ex_bias = module.calc_updown(weight)
if batch_updown is not None and updown is not None:
batch_updown += updown.to(batch_updown.device)
@@ -389,7 +392,10 @@ def network_apply_direct(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
if updown is not None:
if deactivate:
updown *= -1
- new_weight = self.weight.to(devices.device) + updown.to(devices.device)
+ try:
+ new_weight = self.weight.to(devices.device) + updown.to(devices.device)
+ except Exception:
+ new_weight = self.weight + updown
if getattr(self, "quant_type", None) in ['nf4', 'fp4'] and bnb is not None:
self.weight = bnb.nn.Params4bit(new_weight, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
else:
From e9f951b2c503f85c21d568e469a15f54b51fcbd1 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Wed, 11 Dec 2024 14:20:01 -0500
Subject: [PATCH 104/162] offload logging
Signed-off-by: Vladimir Mandic
---
TODO.md | 5 +++
modules/devices.py | 2 ++
modules/processing_diffusers.py | 1 +
modules/sd_models.py | 55 ++++++++++++++++++++++-----------
modules/shared.py | 2 +-
5 files changed, 46 insertions(+), 19 deletions(-)
diff --git a/TODO.md b/TODO.md
index 90372e41f..9692b7635 100644
--- a/TODO.md
+++ b/TODO.md
@@ -2,6 +2,11 @@
Main ToDo list can be found at [GitHub projects](https://github.com/users/vladmandic/projects)
+## Pending
+
+- LoRA direct with caching
+- Previewer issues
+
## Future Candidates
- SD35 IPAdapter:
diff --git a/modules/devices.py b/modules/devices.py
index 51b770481..3f1439fb7 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -231,6 +231,8 @@ def get_stats():
return gpu, ram
t1 = time.time()
timer.process.add('gc', t1 - t0)
+ if fast:
+ return gpu, ram
new_gpu, new_used_gpu, new_ram, new_used_ram, oom = get_stats()
before = { 'gpu': gpu, 'ram': ram }
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index 9f12e44ab..3c59bbcf7 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -458,6 +458,7 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
extra_networks.deactivate(p)
timer.process.add('lora', networks.timer.total)
+ networks.timer.clear(complete=True)
results = process_decode(p, output)
timer.process.record('decode')
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 85a5dc5d7..c39c0263e 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -36,7 +36,6 @@
checkpoint_tiles = checkpoint_titles # legacy compatibility
should_offload = ['sc', 'sd3', 'f1', 'hunyuandit', 'auraflow', 'omnigen']
offload_hook_instance = None
-offload_component_map = {}
class NoWatermark:
@@ -367,11 +366,7 @@ def set_diffuser_offload(sd_model, op: str = 'model'):
except Exception as e:
shared.log.error(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} {e}')
if shared.opts.diffusers_offload_mode == "balanced":
- try:
- shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} watermarks low={shared.opts.diffusers_offload_min_gpu_memory} high={shared.opts.diffusers_offload_max_gpu_memory} limit={shared.opts.cuda_mem_fraction:.2f}')
- sd_model = apply_balanced_offload(sd_model)
- except Exception as e:
- shared.log.error(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} {e}')
+ sd_model = apply_balanced_offload(sd_model)
process_timer.add('offload', time.time() - t0)
@@ -386,11 +381,30 @@ def __init__(self):
self.cpu_watermark = shared.opts.diffusers_offload_max_cpu_memory
self.gpu = int(shared.gpu_memory * shared.opts.diffusers_offload_max_gpu_memory * 1024*1024*1024)
self.cpu = int(shared.cpu_memory * shared.opts.diffusers_offload_max_cpu_memory * 1024*1024*1024)
- gpu_dict = { "min": self.min_watermark, "max": self.max_watermark, "bytes": self.gpu }
- cpu_dict = { "max": self.cpu_watermark, "bytes": self.cpu }
- shared.log.info(f'Init offload: type=balanced gpu={gpu_dict} cpu={cpu_dict}')
+ self.offload_map = {}
+ gpu = f'{shared.gpu_memory * shared.opts.diffusers_offload_min_gpu_memory:.3f}-{shared.gpu_memory * shared.opts.diffusers_offload_max_gpu_memory}:{shared.gpu_memory}'
+ shared.log.info(f'Offload: type=balanced op=init watermark={self.min_watermark}-{self.max_watermark} gpu={gpu} cpu={shared.cpu_memory:.3f} limit={shared.opts.cuda_mem_fraction:.2f}')
+ self.validate()
super().__init__()
+ def validate(self):
+ if shared.opts.diffusers_offload_mode != 'balanced':
+ return
+ if shared.opts.diffusers_offload_min_gpu_memory < 0 or shared.opts.diffusers_offload_min_gpu_memory > 1:
+ shared.opts.diffusers_offload_min_gpu_memory = 0.25
+ shared.log.warning(f'Offload: type=balanced op=validate: watermark low={shared.opts.diffusers_offload_min_gpu_memory} invalid value')
+ if shared.opts.diffusers_offload_max_gpu_memory < 0.1 or shared.opts.diffusers_offload_max_gpu_memory > 1:
+ shared.opts.diffusers_offload_max_gpu_memory = 0.75
+ shared.log.warning(f'Offload: type=balanced op=validate: watermark high={shared.opts.diffusers_offload_max_gpu_memory} invalid value')
+ if shared.opts.diffusers_offload_min_gpu_memory > shared.opts.diffusers_offload_max_gpu_memory:
+ shared.opts.diffusers_offload_min_gpu_memory = shared.opts.diffusers_offload_max_gpu_memory
+ shared.log.warning(f'Offload: type=balanced op=validate: watermark low={shared.opts.diffusers_offload_min_gpu_memory} reset')
+ if shared.opts.diffusers_offload_max_gpu_memory * shared.gpu_memory < 4:
+ shared.log.warning(f'Offload: type=balanced op=validate: watermark high={shared.opts.diffusers_offload_max_gpu_memory} low memory')
+
+ def model_size(self):
+ return sum(self.offload_map.values())
+
def init_hook(self, module):
return module
@@ -421,12 +435,14 @@ def apply_balanced_offload(sd_model, exclude=[]):
global offload_hook_instance # pylint: disable=global-statement
if shared.opts.diffusers_offload_mode != "balanced":
return sd_model
- if offload_hook_instance is None or offload_hook_instance.min_watermark != shared.opts.diffusers_offload_min_gpu_memory or offload_hook_instance.max_watermark != shared.opts.diffusers_offload_max_gpu_memory:
- offload_hook_instance = OffloadHook()
t0 = time.time()
excluded = ['OmniGenPipeline']
if sd_model.__class__.__name__ in excluded:
return sd_model
+ cached = True
+ if offload_hook_instance is None or offload_hook_instance.min_watermark != shared.opts.diffusers_offload_min_gpu_memory or offload_hook_instance.max_watermark != shared.opts.diffusers_offload_max_gpu_memory:
+ cached = False
+ offload_hook_instance = OffloadHook()
checkpoint_name = sd_model.sd_checkpoint_info.name if getattr(sd_model, "sd_checkpoint_info", None) is not None else None
if checkpoint_name is None:
checkpoint_name = sd_model.__class__.__name__
@@ -439,7 +455,7 @@ def get_pipe_modules(pipe):
modules_names = [m for m in modules_names if m not in exclude and not m.startswith('_')]
modules = {}
for module_name in modules_names:
- module_size = offload_component_map.get(module_name, None)
+ module_size = offload_hook_instance.offload_map.get(module_name, None)
if module_size is None:
module = getattr(pipe, module_name, None)
if not isinstance(module, torch.nn.Module):
@@ -447,9 +463,9 @@ def get_pipe_modules(pipe):
try:
module_size = sum(p.numel()*p.element_size() for p in module.parameters(recurse=True)) / 1024 / 1024 / 1024
except Exception as e:
- shared.log.error(f'Balanced offload: module={module_name} {e}')
+ shared.log.error(f'Offload: type=balanced op=calc module={module_name} {e}')
module_size = 0
- offload_component_map[module_name] = module_size
+ offload_hook_instance.offload_map[module_name] = module_size
modules[module_name] = module_size
modules = sorted(modules.items(), key=lambda x: x[1], reverse=True)
return modules
@@ -476,12 +492,12 @@ def apply_balanced_offload_to_module(pipe):
if do_offload:
module = module.to(devices.cpu, non_blocking=True)
used_gpu -= module_size
- debug_move(f'Balanced offload: op={"move" if do_offload else "skip"} gpu={prev_gpu:.3f}:{used_gpu:.3f} perc={perc_gpu:.2f} ram={used_ram:.3f} current={module.device} dtype={module.dtype} component={module.__class__.__name__} size={module_size:.3f}')
+ debug_move(f'Offload: type=balanced op={"move" if do_offload else "skip"} gpu={prev_gpu:.3f}:{used_gpu:.3f} perc={perc_gpu:.2f} ram={used_ram:.3f} current={module.device} dtype={module.dtype} component={module.__class__.__name__} size={module_size:.3f}')
except Exception as e:
if 'bitsandbytes' not in str(e):
- shared.log.error(f'Balanced offload: module={module_name} {e}')
+ shared.log.error(f'Offload: type=balanced op=apply module={module_name} {e}')
if os.environ.get('SD_MOVE_DEBUG', None):
- errors.display(e, f'Balanced offload: module={module_name}')
+ errors.display(e, f'Offload: type=balanced op=apply module={module_name}')
module.offload_dir = os.path.join(shared.opts.accelerate_offload_path, checkpoint_name, module_name)
module = accelerate.hooks.add_hook_to_module(module, offload_hook_instance, append=True)
module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
@@ -505,6 +521,8 @@ def apply_balanced_offload_to_module(pipe):
process_timer.add('offload', t)
fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
debug_move(f'Apply offload: time={t:.2f} type=balanced fn={fn}')
+ if not cached:
+ shared.log.info(f'Offload: type=balanced op=apply class={sd_model.__class__.__name__} modules={len(offload_hook_instance.offload_map)} size={offload_hook_instance.model_size():.3f}')
return sd_model
@@ -1000,7 +1018,8 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No
shared.log.error(f"Load {op}: {e}")
errors.display(e, "Model")
- devices.torch_gc(force=True)
+ if shared.opts.diffusers_offload_mode != 'balanced':
+ devices.torch_gc(force=True)
if sd_model is not None:
script_callbacks.model_loaded_callback(sd_model)
diff --git a/modules/shared.py b/modules/shared.py
index f383ee2b6..eaf4b361d 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -484,7 +484,7 @@ def get_default_modes():
"diffusers_extract_ema": OptionInfo(False, "Use model EMA weights when possible", gr.Checkbox, {"visible": False }),
"diffusers_offload_mode": OptionInfo(startup_offload_mode, "Model offload mode", gr.Radio, {"choices": ['none', 'balanced', 'model', 'sequential']}),
"diffusers_offload_min_gpu_memory": OptionInfo(0.25, "Balanced offload GPU low watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
- "diffusers_offload_max_gpu_memory": OptionInfo(0.70, "Balanced offload GPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
+ "diffusers_offload_max_gpu_memory": OptionInfo(0.70, "Balanced offload GPU high watermark", gr.Slider, {"minimum": 0.1, "maximum": 1, "step": 0.01 }),
"diffusers_offload_max_cpu_memory": OptionInfo(0.90, "Balanced offload CPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01, "visible": False }),
"advanced_sep": OptionInfo("Advanced Options
", "", gr.HTML),
From 8f21e96f73e4149d2772582a843d4ed84a1fd029 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Wed, 11 Dec 2024 15:22:51 -0500
Subject: [PATCH 105/162] update bnb and increase ui timeouts
Signed-off-by: Vladimir Mandic
---
TODO.md | 1 +
installer.py | 6 +++---
javascript/logger.js | 10 ++++++----
modules/model_quant.py | 4 ++--
modules/sd_samplers_common.py | 3 ++-
5 files changed, 14 insertions(+), 10 deletions(-)
diff --git a/TODO.md b/TODO.md
index 9692b7635..76c672260 100644
--- a/TODO.md
+++ b/TODO.md
@@ -6,6 +6,7 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma
- LoRA direct with caching
- Previewer issues
+- Redesign postprocessing
## Future Candidates
diff --git a/installer.py b/installer.py
index 93cd10413..b020418e1 100644
--- a/installer.py
+++ b/installer.py
@@ -682,7 +682,7 @@ def install_torch_addons():
if opts.get('nncf_compress_weights', False) and not args.use_openvino:
install('nncf==2.7.0', 'nncf')
if opts.get('optimum_quanto_weights', False):
- install('optimum-quanto', 'optimum-quanto')
+ install('optimum-quanto==0.2.6', 'optimum-quanto')
if triton_command is not None:
install(triton_command, 'triton', quiet=True)
@@ -999,8 +999,8 @@ def install_optional():
install('basicsr')
install('gfpgan')
install('clean-fid')
- install('optimum-quanto', ignore=True)
- install('bitsandbytes', ignore=True)
+ install('optimum-quanto=0.2.6', ignore=True)
+ install('bitsandbytes==0.45.0', ignore=True)
install('pynvml', ignore=True)
install('ultralytics==8.3.40', ignore=True)
install('Cython', ignore=True)
diff --git a/javascript/logger.js b/javascript/logger.js
index 1677fa537..8fa812b86 100644
--- a/javascript/logger.js
+++ b/javascript/logger.js
@@ -1,3 +1,5 @@
+const timeout = 10000;
+
const log = async (...msg) => {
const dt = new Date();
const ts = `${dt.getHours().toString().padStart(2, '0')}:${dt.getMinutes().toString().padStart(2, '0')}:${dt.getSeconds().toString().padStart(2, '0')}.${dt.getMilliseconds().toString().padStart(3, '0')}`;
@@ -21,7 +23,7 @@ const error = async (...msg) => {
// if (!txt.includes('asctime') && !txt.includes('xhr.')) xhrPost('/sdapi/v1/log', { error: txt }); // eslint-disable-line no-use-before-define
};
-const xhrInternal = (xhrObj, data, handler = undefined, errorHandler = undefined, ignore = false, serverTimeout = 5000) => {
+const xhrInternal = (xhrObj, data, handler = undefined, errorHandler = undefined, ignore = false, serverTimeout = timeout) => {
const err = (msg) => {
if (!ignore) {
error(`${msg}: state=${xhrObj.readyState} status=${xhrObj.status} response=${xhrObj.responseText}`);
@@ -30,7 +32,7 @@ const xhrInternal = (xhrObj, data, handler = undefined, errorHandler = undefined
};
xhrObj.setRequestHeader('Content-Type', 'application/json');
- xhrObj.timeout = serverTimeout;
+ xhrObj.timeout = timeout;
xhrObj.ontimeout = () => err('xhr.ontimeout');
xhrObj.onerror = () => err('xhr.onerror');
xhrObj.onabort = () => err('xhr.onabort');
@@ -52,14 +54,14 @@ const xhrInternal = (xhrObj, data, handler = undefined, errorHandler = undefined
xhrObj.send(req);
};
-const xhrGet = (url, data, handler = undefined, errorHandler = undefined, ignore = false, serverTimeout = 5000) => {
+const xhrGet = (url, data, handler = undefined, errorHandler = undefined, ignore = false, serverTimeout = timeout) => {
const xhr = new XMLHttpRequest();
const args = Object.keys(data).map((k) => `${encodeURIComponent(k)}=${encodeURIComponent(data[k])}`).join('&');
xhr.open('GET', `${url}?${args}`, true);
xhrInternal(xhr, data, handler, errorHandler, ignore, serverTimeout);
};
-function xhrPost(url, data, handler = undefined, errorHandler = undefined, ignore = false, serverTimeout = 5000) {
+function xhrPost(url, data, handler = undefined, errorHandler = undefined, ignore = false, serverTimeout = timeout) {
const xhr = new XMLHttpRequest();
xhr.open('POST', url, true);
xhrInternal(xhr, data, handler, errorHandler, ignore, serverTimeout);
diff --git a/modules/model_quant.py b/modules/model_quant.py
index 03043b33a..5c0b40080 100644
--- a/modules/model_quant.py
+++ b/modules/model_quant.py
@@ -73,7 +73,7 @@ def load_bnb(msg='', silent=False):
global bnb # pylint: disable=global-statement
if bnb is not None:
return bnb
- install('bitsandbytes', quiet=True)
+ install('bitsandbytes==0.45.0', quiet=True)
try:
import bitsandbytes
bnb = bitsandbytes
@@ -96,7 +96,7 @@ def load_quanto(msg='', silent=False):
global quanto # pylint: disable=global-statement
if quanto is not None:
return quanto
- install('optimum-quanto', quiet=True)
+ install('optimum-quanto==0.2.6', quiet=True)
try:
from optimum import quanto as optimum_quanto # pylint: disable=no-name-in-module
quanto = optimum_quanto
diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py
index a96795a25..cd51043c7 100644
--- a/modules/sd_samplers_common.py
+++ b/modules/sd_samplers_common.py
@@ -51,6 +51,7 @@ def single_sample_to_image(sample, approximation=None):
return Image.new(mode="RGB", size=(512, 512))
if len(sample.shape) == 4 and sample.shape[0]: # likely animatediff latent
sample = sample.permute(1, 0, 2, 3)[0]
+ """
# TODO remove
if shared.native: # [-x,x] to [-5,5]
sample_max = torch.max(sample)
@@ -59,7 +60,7 @@ def single_sample_to_image(sample, approximation=None):
sample_min = torch.min(sample)
if sample_min < -5:
sample = sample * (5 / abs(sample_min))
-
+ """
if approximation == 2: # TAESD
x_sample = sd_vae_taesd.decode(sample)
x_sample = (1.0 + x_sample) / 2.0 # preview requires smaller range
From bd540efb73177f46b59e2c263bbcb378e9ea7d93 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Wed, 11 Dec 2024 18:10:51 -0500
Subject: [PATCH 106/162] lora: absolute path, hf download, flux controlnet
loras
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 9 +++-
TODO.md | 1 +
installer.py | 2 +-
modules/control/run.py | 3 +-
modules/control/units/controlnet.py | 73 ++++++++++++++++++++++-------
modules/lora/networks.py | 16 +++++++
modules/processing_diffusers.py | 4 +-
requirements.txt | 2 +-
8 files changed, 87 insertions(+), 23 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f215b84ec..2ecc7ef26 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
# Change Log for SD.Next
-## Update for 2024-12-10
+## Update for 2024-12-11
### New models and integrations
@@ -22,6 +22,9 @@
*recommended*: guidance scale 30
- [Depth](https://huggingface.co/black-forest-labs/FLUX.1-Depth-dev): ~23.8GB, replaces currently loaded model
*recommended*: guidance scale 10
+- [Flux ControlNet LoRA](https://huggingface.co/black-forest-labs/FLUX.1-Canny-dev-lora)
+ alternative to standard ControlNets, FLUX.1 also allows LoRA to help guide the generation process
+ both **Depth** and **Canny** LoRAs are available in standard control menus
- [StabilityAI SD35 ControlNets]([sd3_medium](https://huggingface.co/stabilityai/stable-diffusion-3.5-controlnets))
- In addition to previously released `InstantX` and `Alimama`, we now have *official* ones from StabilityAI
- [Style Aligned Image Generation](https://style-aligned-gen.github.io/)
@@ -39,6 +42,10 @@
thanks @AI-Casanova
- LoRA weights can be applied/unapplied as on each generate or they can store weights backups for later use
this setting has large performance and resource implications, see [Offload](https://github.com/vladmandic/automatic/wiki/Offload) wiki for details
+ - LoRA name in prompt can now also be an absolute path to a LoRA file, even if LoRA is not indexed
+ example: ``
+ - LoRA name in prompt can now also be path to a LoRA file op `huggingface`
+ example: ``
- **Model loader** improvements:
- detect model components on model load fail
- allow passing absolute path to model loader
diff --git a/TODO.md b/TODO.md
index 76c672260..63088d39f 100644
--- a/TODO.md
+++ b/TODO.md
@@ -17,6 +17,7 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma
- SANA:
- LTX-Video:
- TorchAO:
+- ControlNetUnion/ControlNetPromax:
## Other
diff --git a/installer.py b/installer.py
index b020418e1..36254b66a 100644
--- a/installer.py
+++ b/installer.py
@@ -459,7 +459,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
def check_diffusers():
if args.skip_all or args.skip_requirements:
return
- sha = '3335e2262d47e7d7e311a44dea7f454b5f01b643' # diffusers commit hash
+ sha = '914a585be8187ec0ad92fab4f072c992f8c297cd' # diffusers commit hash
pkg = pkg_resources.working_set.by_key.get('diffusers', None)
minor = int(pkg.version.split('.')[1] if pkg is not None else 0)
cur = opts.get('diffusers_version', '') if minor > 0 else ''
diff --git a/modules/control/run.py b/modules/control/run.py
index 2fe13dd73..6ae7fb20c 100644
--- a/modules/control/run.py
+++ b/modules/control/run.py
@@ -332,7 +332,7 @@ def set_pipe():
p.task_args['control_guidance_start'] = control_guidance_start
p.task_args['control_guidance_end'] = control_guidance_end
p.task_args['guess_mode'] = p.guess_mode
- instance = controlnet.ControlNetPipeline(selected_models, shared.sd_model)
+ instance = controlnet.ControlNetPipeline(selected_models, shared.sd_model, p=p)
pipe = instance.pipeline
elif unit_type == 'xs' and has_models:
p.extra_generation_params["Control mode"] = 'ControlNet-XS'
@@ -370,7 +370,6 @@ def set_pipe():
debug(f'Control: run type={unit_type} models={has_models} pipe={pipe.__class__.__name__ if pipe is not None else None}')
return pipe
-
pipe = set_pipe()
debug(f'Control pipeline: class={pipe.__class__.__name__} args={vars(p)}')
t1, t2, t3 = time.time(), 0, 0
diff --git a/modules/control/units/controlnet.py b/modules/control/units/controlnet.py
index 3f68a4896..3fa2d90eb 100644
--- a/modules/control/units/controlnet.py
+++ b/modules/control/units/controlnet.py
@@ -5,6 +5,7 @@
from modules.control.units import detect
from modules.shared import log, opts, listdir
from modules import errors, sd_models, devices, model_quant
+from modules.processing import StableDiffusionProcessingControl
what = 'ControlNet'
@@ -75,6 +76,8 @@
"InstantX Union": 'InstantX/FLUX.1-dev-Controlnet-Union',
"InstantX Canny": 'InstantX/FLUX.1-dev-Controlnet-Canny',
"JasperAI Depth": 'jasperai/Flux.1-dev-Controlnet-Depth',
+ "BlackForrestLabs Canny LoRA": '/huggingface.co/black-forest-labs/FLUX.1-Canny-dev-lora/flux1-canny-dev-lora.safetensors',
+ "BlackForrestLabs Depth LoRA": '/huggingface.co/black-forest-labs/FLUX.1-Depth-dev-lora/flux1-depth-dev-lora.safetensors',
"JasperAI Surface Normals": 'jasperai/Flux.1-dev-Controlnet-Surface-Normals',
"JasperAI Upscaler": 'jasperai/Flux.1-dev-Controlnet-Upscaler',
"Shakker-Labs Union": 'Shakker-Labs/FLUX.1-dev-ControlNet-Union-Pro',
@@ -162,12 +165,21 @@ def reset(self):
self.model = None
self.model_id = None
- def get_class(self):
+ def get_class(self, model_id:str=''):
import modules.shared
if modules.shared.sd_model_type == 'sd':
from diffusers import ControlNetModel as cls # pylint: disable=reimported
config = 'lllyasviel/control_v11p_sd15_canny'
elif modules.shared.sd_model_type == 'sdxl':
+ # TODO ControlNetUnion
+ """
+ if 'union' in model_id.lower():
+ from diffusers import ControlNetUnionModel as cls
+ config = 'xinsir/controlnet-union-sdxl-1.0'
+ else:
+ from diffusers import ControlNetModel as cls # pylint: disable=reimported # sdxl shares same model class
+ config = 'Eugeoter/noob-sdxl-controlnet-canny'
+ """
from diffusers import ControlNetModel as cls # pylint: disable=reimported # sdxl shares same model class
config = 'Eugeoter/noob-sdxl-controlnet-canny'
elif modules.shared.sd_model_type == 'f1':
@@ -181,7 +193,7 @@ def get_class(self):
return None, None
return cls, config
- def load_safetensors(self, model_path):
+ def load_safetensors(self, model_id, model_path):
name = os.path.splitext(model_path)[0]
config_path = None
if not os.path.exists(model_path):
@@ -206,7 +218,7 @@ def load_safetensors(self, model_path):
config_path = f'{name}.json'
if config_path is not None:
self.load_config['original_config_file '] = config_path
- cls, config = self.get_class()
+ cls, config = self.get_class(model_id)
if cls is None:
log.error(f'Control {what} model load failed: unknown base model')
else:
@@ -228,18 +240,21 @@ def load(self, model_id: str = None, force: bool = True) -> str:
if model_path is None:
log.error(f'Control {what} model load failed: id="{model_id}" error=unknown model id')
return
+ if 'lora' in model_id.lower():
+ self.model = model_path
+ return
if model_id == self.model_id and not force:
log.debug(f'Control {what} model: id="{model_id}" path="{model_path}" already loaded')
return
log.debug(f'Control {what} model loading: id="{model_id}" path="{model_path}"')
+ cls, _config = self.get_class(model_id)
if model_path.endswith('.safetensors'):
- self.load_safetensors(model_path)
+ self.load_safetensors(model_id, model_path)
else:
kwargs = {}
if '/bin' in model_path:
model_path = model_path.replace('/bin', '')
self.load_config['use_safetensors'] = False
- cls, _config = self.get_class()
if cls is None:
log.error(f'Control {what} model load failed: id="{model_id}" unknown base model')
return
@@ -271,7 +286,7 @@ def load(self, model_id: str = None, force: bool = True) -> str:
self.model.to(self.device)
t1 = time.time()
self.model_id = model_id
- log.debug(f'Control {what} model loaded: id="{model_id}" path="{model_path}" time={t1-t0:.2f}')
+ log.debug(f'Control {what} model loaded: id="{model_id}" path="{model_path}" cls={cls.__name__} time={t1-t0:.2f}')
return f'{what} loaded model: {model_id}'
except Exception as e:
log.error(f'Control {what} model load failed: id="{model_id}" error={e}')
@@ -284,16 +299,30 @@ def __init__(self,
controlnet: Union[ControlNetModel, list[ControlNetModel]],
pipeline: Union[StableDiffusionXLPipeline, StableDiffusionPipeline, FluxPipeline, StableDiffusion3Pipeline],
dtype = None,
+ p: StableDiffusionProcessingControl = None,
):
t0 = time.time()
self.orig_pipeline = pipeline
self.pipeline = None
+
+ controlnets = controlnet if isinstance(controlnet, list) else [controlnet]
+ loras = [cn for cn in controlnets if isinstance(cn, str)]
+ controlnets = [cn for cn in controlnets if not isinstance(cn, str)]
+
if pipeline is None:
log.error('Control model pipeline: model not loaded')
return
- elif detect.is_sdxl(pipeline):
- from diffusers import StableDiffusionXLControlNetPipeline
- self.pipeline = StableDiffusionXLControlNetPipeline(
+ elif detect.is_sdxl(pipeline) and len(controlnets) > 0:
+ from diffusers import StableDiffusionXLControlNetPipeline, StableDiffusionXLControlNetUnionPipeline
+ # TODO ControlNetUnion
+ """
+ if controlnet.__class__.__name__ == 'ControlNetUnionModel':
+ cls = StableDiffusionXLControlNetUnionPipeline
+ else:
+ cls = StableDiffusionXLControlNetPipeline
+ """
+ cls = StableDiffusionXLControlNetPipeline
+ self.pipeline = cls(
vae=pipeline.vae,
text_encoder=pipeline.text_encoder,
text_encoder_2=pipeline.text_encoder_2,
@@ -302,9 +331,9 @@ def __init__(self,
unet=pipeline.unet,
scheduler=pipeline.scheduler,
feature_extractor=getattr(pipeline, 'feature_extractor', None),
- controlnet=controlnet, # can be a list
+ controlnet=controlnets, # can be a list
)
- elif detect.is_sd15(pipeline):
+ elif detect.is_sd15(pipeline) and len(controlnets) > 0:
from diffusers import StableDiffusionControlNetPipeline
self.pipeline = StableDiffusionControlNetPipeline(
vae=pipeline.vae,
@@ -315,10 +344,10 @@ def __init__(self,
feature_extractor=getattr(pipeline, 'feature_extractor', None),
requires_safety_checker=False,
safety_checker=None,
- controlnet=controlnet, # can be a list
+ controlnet=controlnets, # can be a list
)
sd_models.move_model(self.pipeline, pipeline.device)
- elif detect.is_f1(pipeline):
+ elif detect.is_f1(pipeline) and len(controlnets) > 0:
from diffusers import FluxControlNetPipeline
self.pipeline = FluxControlNetPipeline(
vae=pipeline.vae.to(devices.device),
@@ -328,9 +357,9 @@ def __init__(self,
tokenizer_2=pipeline.tokenizer_2,
transformer=pipeline.transformer,
scheduler=pipeline.scheduler,
- controlnet=controlnet, # can be a list
+ controlnet=controlnets, # can be a list
)
- elif detect.is_sd3(pipeline):
+ elif detect.is_sd3(pipeline) and len(controlnets) > 0:
from diffusers import StableDiffusion3ControlNetPipeline
self.pipeline = StableDiffusion3ControlNetPipeline(
vae=pipeline.vae,
@@ -342,8 +371,18 @@ def __init__(self,
tokenizer_3=pipeline.tokenizer_3,
transformer=pipeline.transformer,
scheduler=pipeline.scheduler,
- controlnet=controlnet, # can be a list
+ controlnet=controlnets, # can be a list
)
+ elif len(loras) > 0:
+ self.pipeline = pipeline
+ for lora in loras:
+ log.debug(f'Control {what} pipeline: lora="{lora}"')
+ lora = lora.replace('/huggingface.co/', '')
+ self.pipeline.load_lora_weights(lora)
+ """
+ if p is not None:
+ p.prompt += f''
+ """
else:
log.error(f'Control {what} pipeline: class={pipeline.__class__.__name__} unsupported model type')
return
@@ -353,6 +392,7 @@ def __init__(self,
return
if dtype is not None:
self.pipeline = self.pipeline.to(dtype)
+
if opts.diffusers_offload_mode == 'none':
sd_models.move_model(self.pipeline, devices.device)
from modules.sd_models import set_diffuser_offload
@@ -362,5 +402,6 @@ def __init__(self,
log.debug(f'Control {what} pipeline: class={self.pipeline.__class__.__name__} time={t1-t0:.2f}')
def restore(self):
+ self.pipeline.unload_lora_weights()
self.pipeline = None
return self.orig_pipeline
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 5d285af95..a38945072 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -207,11 +207,27 @@ def add_network(filename):
shared.log.info(f'Available LoRAs: path="{shared.cmd_opts.lora_dir}" items={len(available_networks)} folders={len(forbidden_network_aliases)} time={t1 - t0:.2f}')
+def network_download(name):
+ from huggingface_hub import hf_hub_download
+ if os.path.exists(name):
+ return network.NetworkOnDisk(name, name)
+ parts = name.split('/')
+ if len(parts) >= 5 and parts[1] == 'huggingface.co':
+ repo_id = f'{parts[2]}/{parts[3]}'
+ filename = '/'.join(parts[4:])
+ fn = hf_hub_download(repo_id=repo_id, filename=filename, cache_dir=shared.opts.hfcache_dir)
+ return network.NetworkOnDisk(name, fn)
+ return None
+
+
def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=None):
networks_on_disk: list[network.NetworkOnDisk] = [available_network_aliases.get(name, None) for name in names]
if any(x is None for x in networks_on_disk):
list_available_networks()
networks_on_disk: list[network.NetworkOnDisk] = [available_network_aliases.get(name, None) for name in names]
+ for i in range(len(names)):
+ if names[i].startswith('/'):
+ networks_on_disk[i] = network_download(names[i])
failed_to_load_networks = []
recompile_model = maybe_recompile_model(names, te_multipliers)
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index 3c59bbcf7..3b6f228ba 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -375,10 +375,10 @@ def process_decode(p: processing.StableDiffusionProcessing, output):
elif hasattr(output, 'images'):
results = output.images
else:
- shared.log.warning('Processing returned no results')
+ shared.log.warning('Processing: no results')
results = []
else:
- shared.log.warning('Processing returned no results')
+ shared.log.warning('Processing: no results')
results = []
return results
diff --git a/requirements.txt b/requirements.txt
index 3b1b14c7d..d9eba6958 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -34,7 +34,7 @@ pi-heif
# versioned
safetensors==0.4.5
tensordict==0.1.2
-peft==0.13.1
+peft==0.14.0
httpx==0.24.1
compel==2.0.3
torchsde==0.2.6
From fe1b0a8d351d188899856e0467e132d8356ab599 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Thu, 12 Dec 2024 13:58:58 -0500
Subject: [PATCH 107/162] add docs reference
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 4 +++-
README.md | 9 +++++----
wiki | 2 +-
3 files changed, 9 insertions(+), 6 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2ecc7ef26..75979cc4c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
# Change Log for SD.Next
-## Update for 2024-12-11
+## Update for 2024-12-12
### New models and integrations
@@ -36,6 +36,8 @@
### UI and workflow improvements
+- **Docs**:
+ - New documentation site!
- **LoRA** handler rewrite:
- LoRA weights are no longer calculated on-the-fly during model execution, but are pre-calculated at the start
this results in perceived overhead on generate startup, but results in overall faster execution as LoRA does not need to be processed on each step
diff --git a/README.md b/README.md
index 1bb5eacd0..722041c93 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
-
+
**Image Diffusion implementation with advanced features**
@@ -8,13 +8,14 @@
[![Discord](https://img.shields.io/discord/1101998836328697867?logo=Discord&svg=true)](https://discord.gg/VjvR2tabEX)
[![Sponsors](https://img.shields.io/static/v1?label=Sponsor&message=%E2%9D%A4&logo=GitHub&color=%23fe8e86)](https://github.com/sponsors/vladmandic)
-[Wiki](https://github.com/vladmandic/automatic/wiki) | [Discord](https://discord.gg/VjvR2tabEX) | [Changelog](CHANGELOG.md)
+[Docs](https://vladmandic.github.io/sdnext-docs/) | [Wiki](https://github.com/vladmandic/automatic/wiki) | [Discord](https://discord.gg/VjvR2tabEX) | [Changelog](CHANGELOG.md)
## Table of contents
+- [Documentation](https://vladmandic.github.io/sdnext-docs/)
- [SD.Next Features](#sdnext-features)
- [Model support](#model-support)
- [Platform support](#platform-support)
@@ -137,7 +138,7 @@ This should be fully cross-platform, but we'd really love to have additional con
### Credits
-- Main credit goes to [Automatic1111 WebUI](https://github.com/AUTOMATIC1111/stable-diffusion-webui) for original codebase
+- Main credit goes to [Automatic1111 WebUI](https://github.com/AUTOMATIC1111/stable-diffusion-webui) for the original codebase
- Additional credits are listed in [Credits](https://github.com/AUTOMATIC1111/stable-diffusion-webui/#credits)
- Licenses for modules are listed in [Licenses](html/licenses.html)
@@ -154,7 +155,7 @@ This should be fully cross-platform, but we'd really love to have additional con
### Docs
-If you're unsure how to use a feature, best place to start is [Wiki](https://github.com/vladmandic/automatic/wiki) and if its not there,
+If you're unsure how to use a feature, best place to start is [Docs](https://vladmandic.github.io/sdnext-docs/) or [Wiki](https://github.com/vladmandic/automatic/wiki) and if its not there,
check [ChangeLog](CHANGELOG.md) for when feature was first introduced as it will always have a short note on how to use it
### Sponsors
diff --git a/wiki b/wiki
index db828893c..8d63a0f04 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit db828893c803f1d5d0180cfe09689884bf27af2d
+Subproject commit 8d63a0f04687f24c4ef413f231970087f167175c
From 7a213fe69aaacf2f9c6f3ed64dc7d5a79be5918f Mon Sep 17 00:00:00 2001
From: Disty0
Date: Fri, 13 Dec 2024 00:12:40 +0300
Subject: [PATCH 108/162] IPEX fix Flux
---
modules/intel/ipex/diffusers.py | 42 +++++++++++++++++++++------------
1 file changed, 27 insertions(+), 15 deletions(-)
diff --git a/modules/intel/ipex/diffusers.py b/modules/intel/ipex/diffusers.py
index f742fe5c0..2af602558 100644
--- a/modules/intel/ipex/diffusers.py
+++ b/modules/intel/ipex/diffusers.py
@@ -20,20 +20,31 @@ def fourier_filter(x_in, threshold, scale):
# fp64 error
-def rope(pos: torch.Tensor, dim: int, theta: int) -> torch.Tensor:
- assert dim % 2 == 0, "The dimension must be even."
-
- scale = torch.arange(0, dim, 2, dtype=torch.float32, device=pos.device) / dim # force fp32 instead of fp64
- omega = 1.0 / (theta**scale)
-
- batch_size, seq_length = pos.shape
- out = torch.einsum("...n,d->...nd", pos, omega)
- cos_out = torch.cos(out)
- sin_out = torch.sin(out)
-
- stacked_out = torch.stack([cos_out, -sin_out, sin_out, cos_out], dim=-1)
- out = stacked_out.view(batch_size, -1, dim // 2, 2, 2)
- return out.float()
+class FluxPosEmbed(torch.nn.Module):
+ def __init__(self, theta: int, axes_dim):
+ super().__init__()
+ self.theta = theta
+ self.axes_dim = axes_dim
+
+ def forward(self, ids: torch.Tensor) -> torch.Tensor:
+ n_axes = ids.shape[-1]
+ cos_out = []
+ sin_out = []
+ pos = ids.float()
+ for i in range(n_axes):
+ cos, sin = diffusers.models.embeddings.get_1d_rotary_pos_embed(
+ self.axes_dim[i],
+ pos[:, i],
+ theta=self.theta,
+ repeat_interleave_real=True,
+ use_real=True,
+ freqs_dtype=torch.float32,
+ )
+ cos_out.append(cos)
+ sin_out.append(sin)
+ freqs_cos = torch.cat(cos_out, dim=-1).to(ids.device)
+ freqs_sin = torch.cat(sin_out, dim=-1).to(ids.device)
+ return freqs_cos, freqs_sin
@cache
@@ -337,4 +348,5 @@ def ipex_diffusers():
if not device_supports_fp64 or os.environ.get('IPEX_FORCE_ATTENTION_SLICE', None) is not None:
diffusers.models.attention_processor.SlicedAttnProcessor = SlicedAttnProcessor
diffusers.models.attention_processor.AttnProcessor = AttnProcessor
- diffusers.models.transformers.transformer_flux.rope = rope
+ if not device_supports_fp64:
+ diffusers.models.embeddings.FluxPosEmbed = FluxPosEmbed
From 8a6e5617baa2b70bea0254683a4fb8f0759a7a06 Mon Sep 17 00:00:00 2001
From: Disty0
Date: Fri, 13 Dec 2024 22:05:13 +0300
Subject: [PATCH 109/162] Fix IPEX 2.5
---
modules/intel/ipex/__init__.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/modules/intel/ipex/__init__.py b/modules/intel/ipex/__init__.py
index e1c476e7e..147aa2798 100644
--- a/modules/intel/ipex/__init__.py
+++ b/modules/intel/ipex/__init__.py
@@ -77,7 +77,7 @@ def ipex_init(): # pylint: disable=too-many-statements
torch.cuda.warnings = torch.xpu.warnings
torch.cuda.classproperty = torch.xpu.classproperty
torch.UntypedStorage.cuda = torch.UntypedStorage.xpu
- if not ipex.__version__.startswith("2.3"):
+ if float(ipex.__version__[:3]) < 2.3:
torch.cuda._initialization_lock = torch.xpu.lazy_init._initialization_lock
torch.cuda._initialized = torch.xpu.lazy_init._initialized
torch.cuda._is_in_bad_fork = torch.xpu.lazy_init._is_in_bad_fork
@@ -111,7 +111,7 @@ def ipex_init(): # pylint: disable=too-many-statements
torch.cuda.ComplexFloatStorage = torch.xpu.ComplexFloatStorage
torch.cuda.ComplexDoubleStorage = torch.xpu.ComplexDoubleStorage
- if not legacy or ipex.__version__.startswith("2.3"):
+ if not legacy or float(ipex.__version__[:3]) >= 2.3:
torch.cuda._initialization_lock = torch.xpu._initialization_lock
torch.cuda._initialized = torch.xpu._initialized
torch.cuda._is_in_bad_fork = torch.xpu._is_in_bad_fork
@@ -159,7 +159,7 @@ def ipex_init(): # pylint: disable=too-many-statements
torch.xpu.amp.custom_fwd = torch.cuda.amp.custom_fwd
torch.xpu.amp.custom_bwd = torch.cuda.amp.custom_bwd
torch.cuda.amp = torch.xpu.amp
- if not ipex.__version__.startswith("2.3"):
+ if float(ipex.__version__[:3]) < 2.3:
torch.is_autocast_enabled = torch.xpu.is_autocast_xpu_enabled
torch.get_autocast_gpu_dtype = torch.xpu.get_autocast_xpu_dtype
@@ -178,7 +178,7 @@ def ipex_init(): # pylint: disable=too-many-statements
torch.cuda.amp.GradScaler = ipex.cpu.autocast._grad_scaler.GradScaler
# C
- if legacy and not ipex.__version__.startswith("2.3"):
+ if legacy and float(ipex.__version__[:3]) < 2.3:
torch._C._cuda_getCurrentRawStream = ipex._C._getCurrentRawStream
ipex._C._DeviceProperties.multi_processor_count = ipex._C._DeviceProperties.gpu_subslice_count
ipex._C._DeviceProperties.major = 12
From 8ee5103ade8f16b86d3dd4d42f5361466c489c28 Mon Sep 17 00:00:00 2001
From: Disty0
Date: Fri, 13 Dec 2024 22:16:56 +0300
Subject: [PATCH 110/162] Update to IPEX 2.5.10+xpu
---
CHANGELOG.md | 1 +
installer.py | 4 ++--
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 75979cc4c..7b05eac4f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -83,6 +83,7 @@
### Updates
- Additional Wiki content: Styles, Wildcards, etc.
+- **IPEX**: update to IPEX 2.5.10+xpu
- **OpenVINO**: update to 2024.5.0
- **Sampler** improvements
- Euler FlowMatch: add sigma methods (*karras/exponential/betas*)
diff --git a/installer.py b/installer.py
index 36254b66a..70c655ae3 100644
--- a/installer.py
+++ b/installer.py
@@ -635,13 +635,13 @@ def install_ipex(torch_command):
if os.environ.get("ClDeviceGlobalMemSizeAvailablePercent", None) is None:
os.environ.setdefault('ClDeviceGlobalMemSizeAvailablePercent', '100')
if "linux" in sys.platform:
- torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.3.1+cxx11.abi torchvision==0.18.1+cxx11.abi intel-extension-for-pytorch==2.3.110+xpu oneccl_bind_pt==2.3.100+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/')
+ torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.5.1+cxx11.abi torchvision==0.20.1+cxx11.abi intel-extension-for-pytorch==2.5.10+xpu oneccl_bind_pt==2.5.0+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/')
# torch_command = os.environ.get('TORCH_COMMAND', 'torch torchvision --index-url https://download.pytorch.org/whl/test/xpu') # test wheels are stable previews, significantly slower than IPEX
# os.environ.setdefault('TENSORFLOW_PACKAGE', 'tensorflow==2.15.1 intel-extension-for-tensorflow[xpu]==2.15.0.1')
else:
torch_command = os.environ.get('TORCH_COMMAND', '--pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/xpu') # torchvision doesn't exist on test/stable branch for windows
install(os.environ.get('OPENVINO_PACKAGE', 'openvino==2024.5.0'), 'openvino', ignore=True)
- install('nncf==2.7.0', 'nncf', ignore=True)
+ install('nncf==2.7.0', ignore=True, no_deps=True) # requires older pandas
install(os.environ.get('ONNXRUNTIME_PACKAGE', 'onnxruntime-openvino'), 'onnxruntime-openvino', ignore=True)
return torch_command
From 7d7bcb9684b5b9b535ee74ff9d19de2af3791ab8 Mon Sep 17 00:00:00 2001
From: Disty0
Date: Fri, 13 Dec 2024 23:01:53 +0300
Subject: [PATCH 111/162] Fix balanced offload with Cascade
---
modules/intel/ipex/diffusers.py | 2 +-
modules/sd_models.py | 2 ++
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/modules/intel/ipex/diffusers.py b/modules/intel/ipex/diffusers.py
index 2af602558..5bf5bbe39 100644
--- a/modules/intel/ipex/diffusers.py
+++ b/modules/intel/ipex/diffusers.py
@@ -1,7 +1,7 @@
import os
from functools import wraps, cache
import torch
-import diffusers #0.29.1 # pylint: disable=import-error
+import diffusers # pylint: disable=import-error
from diffusers.models.attention_processor import Attention
# pylint: disable=protected-access, missing-function-docstring, line-too-long
diff --git a/modules/sd_models.py b/modules/sd_models.py
index c39c0263e..c5875c61f 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -481,6 +481,8 @@ def apply_balanced_offload_to_module(pipe):
keys = [k for k in keys if k not in exclude and not k.startswith('_')]
for module_name, module_size in get_pipe_modules(pipe): # pylint: disable=protected-access
module = getattr(pipe, module_name, None)
+ if module is None:
+ continue
network_layer_name = getattr(module, "network_layer_name", None)
device_map = getattr(module, "balanced_offload_device_map", None)
max_memory = getattr(module, "balanced_offload_max_memory", None)
From 814161cb210c623cb87f3fb9f87cd76b1a7e35ac Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Sat, 14 Dec 2024 17:29:51 -0500
Subject: [PATCH 112/162] major controlnet work, xinsir promax and tiling
support
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 17 +++++--
installer.py | 2 +-
modules/control/run.py | 74 ++++++++++++++++++++--------
modules/control/tile.py | 75 +++++++++++++++++++++++++++++
modules/control/unit.py | 38 +++++++++++++--
modules/control/units/controlnet.py | 32 ++++++------
modules/images_resize.py | 10 ++--
modules/sd_models.py | 5 ++
modules/ui_control.py | 10 ++--
scripts/regional_prompting.py | 1 +
10 files changed, 208 insertions(+), 56 deletions(-)
create mode 100644 modules/control/tile.py
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7b05eac4f..c24484113 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
# Change Log for SD.Next
-## Update for 2024-12-12
+## Update for 2024-12-13
### New models and integrations
@@ -33,11 +33,18 @@
style-aligned applies selected attention layers uniformly to all images to achive consistency
can be used with or without input image in which case first prompt is used to establish baseline
*note:* all prompts are processes as a single batch, so vram is limiting factor
+- **ControlNet**
+ - improved support for `Union` controlnets with granular control mode type
+ - added support for latest [Xinsir ProMax](https://huggingface.co/xinsir/controlnet-union-sdxl-1.0) all-in-one controlnet
+ - added support for multiple **Tiling** controlnets, for example [Xinsir Tile](https://huggingface.co/xinsir/controlnet-tile-sdxl-1.0)
+ *note*: when selecting tiles in control settings, you can also specify non-square ratios
+ in which case it will use context-aware image resize to maintain overall composition
### UI and workflow improvements
- **Docs**:
- New documentation site!
+ - Additional Wiki content: Styles, Wildcards, etc.
- **LoRA** handler rewrite:
- LoRA weights are no longer calculated on-the-fly during model execution, but are pre-calculated at the start
this results in perceived overhead on generate startup, but results in overall faster execution as LoRA does not need to be processed on each step
@@ -82,7 +89,6 @@
### Updates
-- Additional Wiki content: Styles, Wildcards, etc.
- **IPEX**: update to IPEX 2.5.10+xpu
- **OpenVINO**: update to 2024.5.0
- **Sampler** improvements
@@ -108,9 +114,10 @@
- simplify img2img/inpaint/sketch canvas handling
- fix prompt caching
- fix xyz grid skip final pass
-- fix sd upscale script
-- fix cogvideox-i2v
-- lora auto-apply tags remove duplicates
+- fix sd upscale script
+- fix cogvideox-i2v
+- lora auto-apply tags remove duplicates
+- control load model on-demand if not already loaded
## Update for 2024-11-21
diff --git a/installer.py b/installer.py
index 70c655ae3..18a8ad1f1 100644
--- a/installer.py
+++ b/installer.py
@@ -459,7 +459,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
def check_diffusers():
if args.skip_all or args.skip_requirements:
return
- sha = '914a585be8187ec0ad92fab4f072c992f8c297cd' # diffusers commit hash
+ sha = '63243406ba5510c10d5cac931882918ceba926f9' # diffusers commit hash
pkg = pkg_resources.working_set.by_key.get('diffusers', None)
minor = int(pkg.version.split('.')[1] if pkg is not None else 0)
cur = opts.get('diffusers_version', '') if minor > 0 else ''
diff --git a/modules/control/run.py b/modules/control/run.py
index 6ae7fb20c..ac1ff233d 100644
--- a/modules/control/run.py
+++ b/modules/control/run.py
@@ -7,6 +7,7 @@
from modules.control import util # helper functions
from modules.control import unit # control units
from modules.control import processors # image preprocessors
+from modules.control import tile # tiling module
from modules.control.units import controlnet # lllyasviel ControlNet
from modules.control.units import xs # VisLearn ControlNet-XS
from modules.control.units import lite # Kohya ControlLLLite
@@ -83,6 +84,7 @@ def control_run(state: str = '',
u.adapter.load(u.model_name, force=False)
else:
u.controlnet.load(u.model_name, force=False)
+ u.update_choices(u.model_name)
if u.process is not None and u.process.override is None and u.override is not None:
u.process.override = u.override
@@ -149,6 +151,7 @@ def control_run(state: str = '',
outpath_grids=shared.opts.outdir_grids or shared.opts.outdir_control_grids,
)
p.state = state
+ p.is_tile = False
# processing.process_init(p)
resize_mode_before = resize_mode_before if resize_name_before != 'None' and inputs is not None and len(inputs) > 0 else 0
@@ -242,7 +245,7 @@ def control_run(state: str = '',
active_model.append(u.adapter)
active_strength.append(float(u.strength))
p.adapter_conditioning_factor = u.factor
- shared.log.debug(f'Control T2I-Adapter unit: i={num_units} process={u.process.processor_id} model={u.adapter.model_id} strength={u.strength} factor={u.factor}')
+ shared.log.debug(f'Control T2I-Adapter unit: i={num_units} process="{u.process.processor_id}" model="{u.adapter.model_id}" strength={u.strength} factor={u.factor}')
elif unit_type == 'controlnet' and u.controlnet.model is not None:
active_process.append(u.process)
active_model.append(u.controlnet)
@@ -250,8 +253,12 @@ def control_run(state: str = '',
active_start.append(float(u.start))
active_end.append(float(u.end))
p.guess_mode = u.guess
- p.control_mode = u.mode
- shared.log.debug(f'Control ControlNet unit: i={num_units} process={u.process.processor_id} model={u.controlnet.model_id} strength={u.strength} guess={u.guess} start={u.start} end={u.end} mode={u.mode}')
+ if isinstance(u.mode, str):
+ p.control_mode = u.choices.index(u.mode) if u.mode in u.choices else 0
+ p.is_tile = p.is_tile or 'tile' in u.mode.lower()
+ p.control_tile = u.tile
+ p.extra_generation_params["Control mode"] = u.mode
+ shared.log.debug(f'Control ControlNet unit: i={num_units} process="{u.process.processor_id}" model="{u.controlnet.model_id}" strength={u.strength} guess={u.guess} start={u.start} end={u.end} mode={u.mode}')
elif unit_type == 'xs' and u.controlnet.model is not None:
active_process.append(u.process)
active_model.append(u.controlnet)
@@ -291,6 +298,7 @@ def control_run(state: str = '',
selected_models = None
elif len(active_model) == 1:
selected_models = active_model[0].model if active_model[0].model is not None else None
+ p.is_tile = p.is_tile or 'tile' in active_model[0].model_id.lower()
has_models = selected_models is not None
control_conditioning = active_strength[0] if len(active_strength) > 0 else 1 # strength or list[strength]
control_guidance_start = active_start[0] if len(active_start) > 0 else 0
@@ -305,29 +313,30 @@ def control_run(state: str = '',
has_models = any(u.enabled for u in units if u.type == 'reference')
else:
pass
+ p.is_tile = p.is_tile and has_models
def set_pipe():
global pipe, instance # pylint: disable=global-statement
pipe = None
if has_models:
p.ops.append('control')
- p.extra_generation_params["Control mode"] = unit_type # overriden later with pretty-print
+ p.extra_generation_params["Control type"] = unit_type # overriden later with pretty-print
+ p.extra_generation_params["Control model"] = ';'.join([(m.model_id or '') for m in active_model if m.model is not None])
p.extra_generation_params["Control conditioning"] = control_conditioning if isinstance(control_conditioning, list) else [control_conditioning]
p.extra_generation_params['Control start'] = control_guidance_start if isinstance(control_guidance_start, list) else [control_guidance_start]
p.extra_generation_params['Control end'] = control_guidance_end if isinstance(control_guidance_end, list) else [control_guidance_end]
- p.extra_generation_params["Control model"] = ';'.join([(m.model_id or '') for m in active_model if m.model is not None])
p.extra_generation_params["Control conditioning"] = ';'.join([str(c) for c in p.extra_generation_params["Control conditioning"]])
p.extra_generation_params['Control start'] = ';'.join([str(c) for c in p.extra_generation_params['Control start']])
p.extra_generation_params['Control end'] = ';'.join([str(c) for c in p.extra_generation_params['Control end']])
if unit_type == 't2i adapter' and has_models:
- p.extra_generation_params["Control mode"] = 'T2I-Adapter'
+ p.extra_generation_params["Control type"] = 'T2I-Adapter'
p.task_args['adapter_conditioning_scale'] = control_conditioning
instance = t2iadapter.AdapterPipeline(selected_models, shared.sd_model)
pipe = instance.pipeline
if inits is not None:
shared.log.warning('Control: T2I-Adapter does not support separate init image')
elif unit_type == 'controlnet' and has_models:
- p.extra_generation_params["Control mode"] = 'ControlNet'
+ p.extra_generation_params["Control type"] = 'ControlNet'
p.task_args['controlnet_conditioning_scale'] = control_conditioning
p.task_args['control_guidance_start'] = control_guidance_start
p.task_args['control_guidance_end'] = control_guidance_end
@@ -335,7 +344,7 @@ def set_pipe():
instance = controlnet.ControlNetPipeline(selected_models, shared.sd_model, p=p)
pipe = instance.pipeline
elif unit_type == 'xs' and has_models:
- p.extra_generation_params["Control mode"] = 'ControlNet-XS'
+ p.extra_generation_params["Control type"] = 'ControlNet-XS'
p.controlnet_conditioning_scale = control_conditioning
p.control_guidance_start = control_guidance_start
p.control_guidance_end = control_guidance_end
@@ -344,14 +353,14 @@ def set_pipe():
if inits is not None:
shared.log.warning('Control: ControlNet-XS does not support separate init image')
elif unit_type == 'lite' and has_models:
- p.extra_generation_params["Control mode"] = 'ControlLLLite'
+ p.extra_generation_params["Control type"] = 'ControlLLLite'
p.controlnet_conditioning_scale = control_conditioning
instance = lite.ControlLLitePipeline(shared.sd_model)
pipe = instance.pipeline
if inits is not None:
shared.log.warning('Control: ControlLLLite does not support separate init image')
elif unit_type == 'reference' and has_models:
- p.extra_generation_params["Control mode"] = 'Reference'
+ p.extra_generation_params["Control type"] = 'Reference'
p.extra_generation_params["Control attention"] = p.attention
p.task_args['reference_attn'] = 'Attention' in p.attention
p.task_args['reference_adain'] = 'Adain' in p.attention
@@ -393,6 +402,8 @@ def set_pipe():
else:
original_pipeline = None
+ possible = sd_models.get_call(pipe).keys()
+
try:
with devices.inference_context():
if isinstance(inputs, str): # only video, the rest is a list
@@ -562,19 +573,29 @@ def set_pipe():
return [], '', '', 'Reference mode without image'
elif unit_type == 'controlnet' and has_models:
if input_type == 0: # Control only
- if shared.sd_model_type in ['f1', 'sd3'] and 'control_image' not in p.task_args:
- p.task_args['control_image'] = p.init_images # some controlnets mandate this
+ if 'control_image' in possible:
+ p.task_args['control_image'] = [p.init_images] if isinstance(p.init_images, Image.Image) else p.init_images
+ elif 'image' in possible:
+ p.task_args['image'] = [p.init_images] if isinstance(p.init_images, Image.Image) else p.init_images
+ if 'control_mode' in possible:
+ p.task_args['control_mode'] = p.control_mode
+ if 'strength' in possible:
p.task_args['strength'] = p.denoising_strength
+ p.init_images = None
elif input_type == 1: # Init image same as control
- p.task_args['control_image'] = p.init_images # switch image and control_image
- p.task_args['strength'] = p.denoising_strength
+ if 'control_image' in possible:
+ p.task_args['control_image'] = p.init_images # switch image and control_image
+ if 'strength' in possible:
+ p.task_args['strength'] = p.denoising_strength
p.init_images = [p.override or input_image] * len(active_model)
elif input_type == 2: # Separate init image
if init_image is None:
shared.log.warning('Control: separate init image not provided')
init_image = input_image
- p.task_args['control_image'] = p.init_images # switch image and control_image
- p.task_args['strength'] = p.denoising_strength
+ if 'control_image' in possible:
+ p.task_args['control_image'] = p.init_images # switch image and control_image
+ if 'strength' in possible:
+ p.task_args['strength'] = p.denoising_strength
p.init_images = [init_image] * len(active_model)
if is_generator:
@@ -607,11 +628,11 @@ def set_pipe():
p.task_args['strength'] = denoising_strength
p.image_mask = mask
shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.INPAINTING) # only controlnet supports inpaint
- elif 'control_image' in p.task_args:
+ if hasattr(p, 'init_images') and p.init_images is not None:
shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.IMAGE_2_IMAGE) # only controlnet supports img2img
else:
shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.TEXT_2_IMAGE)
- if hasattr(p, 'init_images') and p.init_images is not None:
+ if hasattr(p, 'init_images') and p.init_images is not None and 'image' in possible:
p.task_args['image'] = p.init_images # need to set explicitly for txt2img
del p.init_images
if unit_type == 'lite':
@@ -624,9 +645,14 @@ def set_pipe():
# final check
if has_models:
- if unit_type in ['controlnet', 't2i adapter', 'lite', 'xs'] and p.task_args.get('image', None) is None and getattr(p, 'init_images', None) is None:
+ if unit_type in ['controlnet', 't2i adapter', 'lite', 'xs'] \
+ and p.task_args.get('image', None) is None \
+ and p.task_args.get('control_image', None) is None \
+ and getattr(p, 'init_images', None) is None \
+ and getattr(p, 'image', None) is None:
if is_generator:
- yield terminate(f'Mode={p.extra_generation_params.get("Control mode", None)} input image is none')
+ shared.log.debug(f'Control args: {p.task_args}')
+ yield terminate(f'Mode={p.extra_generation_params.get("Control type", None)} input image is none')
return [], '', '', 'Error: Input image is none'
# resize mask
@@ -656,11 +682,17 @@ def set_pipe():
script_runner.initialize_scripts(False)
p.script_args = script.init_default_script_args(script_runner)
- processed = p.scripts.run(p, *p.script_args)
+ # actual processing
+ if p.is_tile:
+ processed: processing.Processed = tile.run_tiling(p, input_image)
+ if processed is None:
+ processed = p.scripts.run(p, *p.script_args)
if processed is None:
processed: processing.Processed = processing.process_images(p) # run actual pipeline
else:
script_run = True
+
+ # postprocessing
processed = p.scripts.after(p, processed, *p.script_args)
output = None
if processed is not None:
diff --git a/modules/control/tile.py b/modules/control/tile.py
new file mode 100644
index 000000000..5dc104e47
--- /dev/null
+++ b/modules/control/tile.py
@@ -0,0 +1,75 @@
+from PIL import Image
+from modules import shared, processing, images, sd_models
+
+
+def get_tile(image: Image.Image, x: int, y: int, sx: int, sy: int) -> Image.Image:
+ return image.crop((
+ (x + 0) * image.width // sx,
+ (y + 0) * image.height // sy,
+ (x + 1) * image.width // sx,
+ (y + 1) * image.height // sy
+ ))
+
+
+def set_tile(image: Image.Image, x: int, y: int, tiled: Image.Image):
+ image.paste(tiled, (x * tiled.width, y * tiled.height))
+ return image
+
+
+def run_tiling(p: processing.StableDiffusionProcessing, input_image: Image.Image) -> processing.Processed:
+ # prepare images
+ sx, sy = p.control_tile.split('x')
+ sx = int(sx)
+ sy = int(sy)
+ if sx <= 0 or sy <= 0:
+ raise ValueError('Control: invalid tile size')
+ control_image = p.task_args.get('control_image', None) or p.task_args.get('image', None)
+ control_upscaled = None
+ if isinstance(control_image, list) and len(control_image) > 0:
+ control_upscaled = images.resize_image(resize_mode=1 if sx==sy else 5,
+ im=control_image[0],
+ width=8 * int(sx * control_image[0].width) // 8,
+ height=8 * int(sy * control_image[0].height) // 8,
+ context='add with forward'
+ )
+ init_image = p.override or input_image
+ init_upscaled = None
+ if init_image is not None:
+ init_upscaled = images.resize_image(resize_mode=1 if sx==sy else 5,
+ im=init_image,
+ width=8 * int(sx * init_image.width) // 8,
+ height=8 * int(sy * init_image.height) // 8,
+ context='add with forward'
+ )
+
+ # stop processing from restoring pipeline on each iteration
+ orig_restore_pipeline = getattr(shared.sd_model, 'restore_pipeline', None)
+ shared.sd_model.restore_pipeline = None
+
+ # run tiling
+ for x in range(sx):
+ for y in range(sy):
+ shared.log.info(f'Control Tile: tile={x+1}-{sx}/{y+1}-{sy} target={control_upscaled}')
+ shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.IMAGE_2_IMAGE)
+ p.init_images = None
+ p.task_args['control_mode'] = p.control_mode
+ p.task_args['strength'] = p.denoising_strength
+ if init_upscaled is not None:
+ p.task_args['image'] = [get_tile(init_upscaled, x, y, sx, sy)]
+ if control_upscaled is not None:
+ p.task_args['control_image'] = [get_tile(control_upscaled, x, y, sx, sy)]
+ processed: processing.Processed = processing.process_images(p) # run actual pipeline
+ if processed is None or len(processed.images) == 0:
+ continue
+ control_upscaled = set_tile(control_upscaled, x, y, processed.images[0])
+
+ # post-process
+ p.width = control_upscaled.width
+ p.height = control_upscaled.height
+ processed.images = [control_upscaled]
+ processed.info = processed.infotext(p, 0)
+ processed.infotexts = [processed.info]
+ shared.sd_model.restore_pipeline = orig_restore_pipeline
+ if hasattr(shared.sd_model, 'restore_pipeline') and shared.sd_model.restore_pipeline is not None:
+ shared.sd_model.restore_pipeline()
+ return processed
diff --git a/modules/control/unit.py b/modules/control/unit.py
index 7dc5528a6..eeb729740 100644
--- a/modules/control/unit.py
+++ b/modules/control/unit.py
@@ -16,6 +16,22 @@
class Unit(): # mashup of gradio controls and mapping to actual implementation classes
+ def update_choices(self, model_id=None):
+ name = model_id or self.model_name
+ if name == 'InstantX Union':
+ self.choices = ['canny', 'tile', 'depth', 'blur', 'pose', 'gray', 'lq']
+ elif name == 'Shakker-Labs Union':
+ self.choices = ['canny', 'tile', 'depth', 'blur', 'pose', 'gray', 'lq']
+ elif name == 'Xinsir Union XL':
+ self.choices = ['openpose', 'depth', 'scribble', 'canny', 'normal']
+ elif name == 'Xinsir ProMax XL':
+ self.choices = ['openpose', 'depth', 'scribble', 'canny', 'normal', 'segment', 'tile', 'repaint']
+ else:
+ self.choices = ['default']
+
+ def __str__(self):
+ return f'Unit: type={self.type} enabled={self.enabled} strength={self.strength} start={self.start} end={self.end} mode={self.mode} tile={self.tile}'
+
def __init__(self,
# values
index: int = None,
@@ -38,6 +54,7 @@ def __init__(self,
control_start = None,
control_end = None,
control_mode = None,
+ control_tile = None,
result_txt = None,
extra_controls: list = [],
):
@@ -70,6 +87,10 @@ def __init__(self,
self.fidelity = 0.5
self.query_weight = 1.0
self.adain_weight = 1.0
+ # control mode
+ self.choices = ['default']
+ # control tile
+ self.tile = '1x1'
def reset():
if self.process is not None:
@@ -92,10 +113,16 @@ def control_change(start, end):
self.end = max(start, end)
def control_mode_change(mode):
- self.mode = mode - 1 if mode > 0 else None
+ self.mode = self.choices.index(mode) if mode is not None and mode in self.choices else 0
+
+ def control_tile_change(tile):
+ self.tile = tile
- def control_mode_show(model_id):
- return gr.update(visible='union' in model_id.lower())
+ def control_choices(model_id):
+ self.update_choices(model_id)
+ mode_visible = 'union' in model_id.lower() or 'promax' in model_id.lower()
+ tile_visible = 'union' in model_id.lower() or 'promax' in model_id.lower() or 'tile' in model_id.lower()
+ return [gr.update(visible=mode_visible, choices=self.choices), gr.update(visible=tile_visible)]
def adapter_extra(c1):
self.factor = c1
@@ -172,7 +199,7 @@ def set_image(image):
else:
self.controls.append(model_id)
model_id.change(fn=self.controlnet.load, inputs=[model_id], outputs=[result_txt], show_progress=True)
- model_id.change(fn=control_mode_show, inputs=[model_id], outputs=[control_mode], show_progress=False)
+ model_id.change(fn=control_choices, inputs=[model_id], outputs=[control_mode, control_tile], show_progress=False)
if extra_controls is not None and len(extra_controls) > 0:
extra_controls[0].change(fn=controlnet_extra, inputs=extra_controls)
elif self.type == 'xs':
@@ -231,3 +258,6 @@ def set_image(image):
if control_mode is not None:
self.controls.append(control_mode)
control_mode.change(fn=control_mode_change, inputs=[control_mode])
+ if control_tile is not None:
+ self.controls.append(control_tile)
+ control_tile.change(fn=control_tile_change, inputs=[control_tile])
diff --git a/modules/control/units/controlnet.py b/modules/control/units/controlnet.py
index 3fa2d90eb..7361638c6 100644
--- a/modules/control/units/controlnet.py
+++ b/modules/control/units/controlnet.py
@@ -52,17 +52,20 @@
'Depth Mid XL': 'diffusers/controlnet-depth-sdxl-1.0-mid',
'OpenPose XL': 'thibaud/controlnet-openpose-sdxl-1.0/bin',
'Xinsir Union XL': 'xinsir/controlnet-union-sdxl-1.0',
+ 'Xinsir ProMax XL': 'brad-twinkl/controlnet-union-sdxl-1.0-promax',
'Xinsir OpenPose XL': 'xinsir/controlnet-openpose-sdxl-1.0',
'Xinsir Canny XL': 'xinsir/controlnet-canny-sdxl-1.0',
'Xinsir Depth XL': 'xinsir/controlnet-depth-sdxl-1.0',
'Xinsir Scribble XL': 'xinsir/controlnet-scribble-sdxl-1.0',
'Xinsir Anime Painter XL': 'xinsir/anime-painter',
+ 'Xinsir Tile XL': 'xinsir/controlnet-tile-sdxl-1.0',
'NoobAI Canny XL': 'Eugeoter/noob-sdxl-controlnet-canny',
'NoobAI Lineart Anime XL': 'Eugeoter/noob-sdxl-controlnet-lineart_anime',
'NoobAI Depth XL': 'Eugeoter/noob-sdxl-controlnet-depth',
'NoobAI Normal XL': 'Eugeoter/noob-sdxl-controlnet-normal',
'NoobAI SoftEdge XL': 'Eugeoter/noob-sdxl-controlnet-softedge_hed',
'NoobAI OpenPose XL': 'einar77/noob-openpose',
+ 'TTPlanet Tile Realistic XL': 'Yakonrus/SDXL_Controlnet_Tile_Realistic_v2',
# 'StabilityAI Canny R128': 'stabilityai/control-lora/control-LoRAs-rank128/control-lora-canny-rank128.safetensors',
# 'StabilityAI Depth R128': 'stabilityai/control-lora/control-LoRAs-rank128/control-lora-depth-rank128.safetensors',
# 'StabilityAI Recolor R128': 'stabilityai/control-lora/control-LoRAs-rank128/control-lora-recolor-rank128.safetensors',
@@ -166,30 +169,30 @@ def reset(self):
self.model_id = None
def get_class(self, model_id:str=''):
- import modules.shared
- if modules.shared.sd_model_type == 'sd':
+ from modules import shared
+ if shared.sd_model_type == 'none':
+ _load = shared.sd_model # trigger a load
+ if shared.sd_model_type == 'sd':
from diffusers import ControlNetModel as cls # pylint: disable=reimported
config = 'lllyasviel/control_v11p_sd15_canny'
- elif modules.shared.sd_model_type == 'sdxl':
- # TODO ControlNetUnion
- """
+ elif shared.sd_model_type == 'sdxl':
if 'union' in model_id.lower():
from diffusers import ControlNetUnionModel as cls
config = 'xinsir/controlnet-union-sdxl-1.0'
+ elif 'promax' in model_id.lower():
+ from diffusers import ControlNetUnionModel as cls
+ config = 'brad-twinkl/controlnet-union-sdxl-1.0-promax'
else:
from diffusers import ControlNetModel as cls # pylint: disable=reimported # sdxl shares same model class
config = 'Eugeoter/noob-sdxl-controlnet-canny'
- """
- from diffusers import ControlNetModel as cls # pylint: disable=reimported # sdxl shares same model class
- config = 'Eugeoter/noob-sdxl-controlnet-canny'
- elif modules.shared.sd_model_type == 'f1':
+ elif shared.sd_model_type == 'f1':
from diffusers import FluxControlNetModel as cls
config = 'InstantX/FLUX.1-dev-Controlnet-Union'
- elif modules.shared.sd_model_type == 'sd3':
+ elif shared.sd_model_type == 'sd3':
from diffusers import SD3ControlNetModel as cls
config = 'InstantX/SD3-Controlnet-Canny'
else:
- log.error(f'Control {what}: type={modules.shared.sd_model_type} unsupported model')
+ log.error(f'Control {what}: type={shared.sd_model_type} unsupported model')
return None, None
return cls, config
@@ -299,7 +302,7 @@ def __init__(self,
controlnet: Union[ControlNetModel, list[ControlNetModel]],
pipeline: Union[StableDiffusionXLPipeline, StableDiffusionPipeline, FluxPipeline, StableDiffusion3Pipeline],
dtype = None,
- p: StableDiffusionProcessingControl = None,
+ p: StableDiffusionProcessingControl = None, # pylint: disable=unused-argument
):
t0 = time.time()
self.orig_pipeline = pipeline
@@ -314,14 +317,11 @@ def __init__(self,
return
elif detect.is_sdxl(pipeline) and len(controlnets) > 0:
from diffusers import StableDiffusionXLControlNetPipeline, StableDiffusionXLControlNetUnionPipeline
- # TODO ControlNetUnion
- """
if controlnet.__class__.__name__ == 'ControlNetUnionModel':
cls = StableDiffusionXLControlNetUnionPipeline
+ controlnets = controlnets[0] # using only first one
else:
cls = StableDiffusionXLControlNetPipeline
- """
- cls = StableDiffusionXLControlNetPipeline
self.pipeline = cls(
vae=pipeline.vae,
text_encoder=pipeline.text_encoder,
diff --git a/modules/images_resize.py b/modules/images_resize.py
index d86ff6f22..5cf3e57e4 100644
--- a/modules/images_resize.py
+++ b/modules/images_resize.py
@@ -5,7 +5,7 @@
from modules import shared
-def resize_image(resize_mode, im, width, height, upscaler_name=None, output_type='image', context=None):
+def resize_image(resize_mode: int, im: Image.Image, width: int, height: int, upscaler_name: str=None, output_type: str='image', context: str=None):
upscaler_name = upscaler_name or shared.opts.upscaler_for_img2img
def latent(im, w, h, upscaler):
@@ -79,18 +79,18 @@ def fill(im, color=None):
def context_aware(im, width, height, context):
import seam_carving # https://github.com/li-plus/seam-carving
- if 'forward' in context:
+ if 'forward' in context.lower():
energy_mode = "forward"
- elif 'backward' in context:
+ elif 'backward' in context.lower():
energy_mode = "backward"
else:
return im
- if 'Add' in context:
+ if 'add' in context.lower():
src_ratio = min(width / im.width, height / im.height)
src_w = int(im.width * src_ratio)
src_h = int(im.height * src_ratio)
src_image = resize(im, src_w, src_h)
- elif 'Remove' in context:
+ elif 'remove' in context.lower():
ratio = width / height
src_ratio = im.width / im.height
src_w = width if ratio > src_ratio else im.width * height // im.height
diff --git a/modules/sd_models.py b/modules/sd_models.py
index c5875c61f..5d42e314b 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -1057,6 +1057,11 @@ def get_signature(cls):
return signature.parameters
+def get_call(cls):
+ signature = inspect.signature(cls.__call__, follow_wrapped=True, eval_str=True)
+ return signature.parameters
+
+
def switch_pipe(cls: diffusers.DiffusionPipeline, pipeline: diffusers.DiffusionPipeline = None, force = False, args = {}):
"""
args:
diff --git a/modules/ui_control.py b/modules/ui_control.py
index 59db12fc5..5a146a8fc 100644
--- a/modules/ui_control.py
+++ b/modules/ui_control.py
@@ -138,7 +138,7 @@ def create_ui(_blocks: gr.Blocks=None):
show_input = gr.Checkbox(label="Show input", value=True, elem_id="control_show_input")
show_preview = gr.Checkbox(label="Show preview", value=False, elem_id="control_show_preview")
with gr.Row():
- input_type = gr.Radio(label="Input type", choices=['Control only', 'Init image same as control', 'Separate init image'], value='Control only', type='index', elem_id='control_input_type')
+ input_type = gr.Radio(label="Control input type", choices=['Control only', 'Init image same as control', 'Separate init image'], value='Control only', type='index', elem_id='control_input_type')
with gr.Row():
denoising_strength = gr.Slider(minimum=0.01, maximum=1.0, step=0.01, label='Denoising strength', value=0.30, elem_id="control_input_denoising_strength")
@@ -251,9 +251,10 @@ def create_ui(_blocks: gr.Blocks=None):
model_id = gr.Dropdown(label="ControlNet", choices=controlnet.list_models(), value='None', elem_id=f'control_unit-{i}-model_name')
ui_common.create_refresh_button(model_id, controlnet.list_models, lambda: {"choices": controlnet.list_models(refresh=True)}, f'refresh_controlnet_models_{i}')
model_strength = gr.Slider(label="CN Strength", minimum=0.01, maximum=2.0, step=0.01, value=1.0, elem_id=f'control_unit-{i}-strength')
- control_start = gr.Slider(label="Start", minimum=0.0, maximum=1.0, step=0.05, value=0, elem_id=f'control_unit-{i}-start')
- control_end = gr.Slider(label="End", minimum=0.0, maximum=1.0, step=0.05, value=1.0, elem_id=f'control_unit-{i}-end')
- control_mode = gr.Dropdown(label="CN Mode", choices=['', 'Canny', 'Tile', 'Depth', 'Blur', 'Pose', 'Gray', 'LQ'], value=0, type='index', visible=False, elem_id=f'control_unit-{i}-mode')
+ control_start = gr.Slider(label="CN Start", minimum=0.0, maximum=1.0, step=0.05, value=0, elem_id=f'control_unit-{i}-start')
+ control_end = gr.Slider(label="CN End", minimum=0.0, maximum=1.0, step=0.05, value=1.0, elem_id=f'control_unit-{i}-end')
+ control_mode = gr.Dropdown(label="CN Mode", choices=['default'], value='default', visible=False, elem_id=f'control_unit-{i}-mode')
+ control_tile = gr.Dropdown(label="CN Tiles", choices=['1x1', '1x2', '1x3', '1x4', '2x1', '2x1', '2x2', '2x3', '2x4', '3x1', '3x2', '3x3', '3x4', '4x1', '4x2', '4x3', '4x4'], value='1x1', visible=False, elem_id=f'control_unit-{i}-tile')
reset_btn = ui_components.ToolButton(value=ui_symbols.reset)
image_upload = gr.UploadButton(label=ui_symbols.upload, file_types=['image'], elem_classes=['form', 'gradio-button', 'tool'])
image_reuse= ui_components.ToolButton(value=ui_symbols.reuse)
@@ -278,6 +279,7 @@ def create_ui(_blocks: gr.Blocks=None):
control_start = control_start,
control_end = control_end,
control_mode = control_mode,
+ control_tile = control_tile,
extra_controls = extra_controls,
)
)
diff --git a/scripts/regional_prompting.py b/scripts/regional_prompting.py
index 08b84dd94..48309704e 100644
--- a/scripts/regional_prompting.py
+++ b/scripts/regional_prompting.py
@@ -82,6 +82,7 @@ def run(self, p: processing.StableDiffusionProcessing, mode, grid, power, thresh
}
# run pipeline
shared.log.debug(f'Regional: args={p.task_args}')
+ p.task_args['prompt'] = p.prompt
processed: processing.Processed = processing.process_images(p) # runs processing using main loop
# restore pipeline and params
From 3e8dec929730a9c2cb765d9051ba870cd3a19769 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Sun, 15 Dec 2024 12:40:54 -0500
Subject: [PATCH 113/162] add freescale
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 16 +-
modules/control/tile.py | 24 +-
modules/control/units/controlnet.py | 12 +-
modules/freescale/__init__.py | 4 +
modules/freescale/free_lunch_utils.py | 305 ++++
modules/freescale/freescale_pipeline.py | 1189 ++++++++++++++++
.../freescale/freescale_pipeline_img2img.py | 1245 +++++++++++++++++
modules/freescale/scale_attention.py | 367 +++++
modules/processing_diffusers.py | 27 +-
modules/sd_samplers_common.py | 4 +-
modules/sd_vae_taesd.py | 3 +
modules/shared.py | 1 +
modules/shared_state.py | 4 +-
modules/ui_control.py | 2 +-
scripts/freescale.py | 130 ++
15 files changed, 3303 insertions(+), 30 deletions(-)
create mode 100644 modules/freescale/__init__.py
create mode 100644 modules/freescale/free_lunch_utils.py
create mode 100644 modules/freescale/freescale_pipeline.py
create mode 100644 modules/freescale/freescale_pipeline_img2img.py
create mode 100644 modules/freescale/scale_attention.py
create mode 100644 scripts/freescale.py
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c24484113..35d3fcbe9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
# Change Log for SD.Next
-## Update for 2024-12-13
+## Update for 2024-12-15
### New models and integrations
@@ -32,13 +32,19 @@
enter multiple prompts in prompt field separated by new line
style-aligned applies selected attention layers uniformly to all images to achive consistency
can be used with or without input image in which case first prompt is used to establish baseline
- *note:* all prompts are processes as a single batch, so vram is limiting factor
+ *note:* all prompts are processes as a single batch, so vram is limiting factor
+- [FreeScale](https://github.com/ali-vilab/FreeScale)
+ enable in scripts, compatible with sd-xl for text and img2img
+ run iterative generation of images at different scales to achieve better results
+ can render 4k sdxl images
+ *note*: disable live preview to avoid memory issues when generating large images
- **ControlNet**
- - improved support for `Union` controlnets with granular control mode type
+ - improved support for **Union** controlnets with granular control mode type
- added support for latest [Xinsir ProMax](https://huggingface.co/xinsir/controlnet-union-sdxl-1.0) all-in-one controlnet
- added support for multiple **Tiling** controlnets, for example [Xinsir Tile](https://huggingface.co/xinsir/controlnet-tile-sdxl-1.0)
*note*: when selecting tiles in control settings, you can also specify non-square ratios
- in which case it will use context-aware image resize to maintain overall composition
+ in which case it will use context-aware image resize to maintain overall composition
+ *note*: available tiling options can be set in settings -> control
### UI and workflow improvements
@@ -118,6 +124,8 @@
- fix cogvideox-i2v
- lora auto-apply tags remove duplicates
- control load model on-demand if not already loaded
+- taesd limit render to 2024px
+- taesd downscale preview to 1024px max
## Update for 2024-11-21
diff --git a/modules/control/tile.py b/modules/control/tile.py
index 5dc104e47..de9df1131 100644
--- a/modules/control/tile.py
+++ b/modules/control/tile.py
@@ -1,3 +1,4 @@
+import time
from PIL import Image
from modules import shared, processing, images, sd_models
@@ -17,30 +18,25 @@ def set_tile(image: Image.Image, x: int, y: int, tiled: Image.Image):
def run_tiling(p: processing.StableDiffusionProcessing, input_image: Image.Image) -> processing.Processed:
+ t0 = time.time()
# prepare images
sx, sy = p.control_tile.split('x')
sx = int(sx)
sy = int(sy)
if sx <= 0 or sy <= 0:
- raise ValueError('Control: invalid tile size')
+ raise ValueError('Control Tile: invalid tile size')
control_image = p.task_args.get('control_image', None) or p.task_args.get('image', None)
control_upscaled = None
if isinstance(control_image, list) and len(control_image) > 0:
- control_upscaled = images.resize_image(resize_mode=1 if sx==sy else 5,
- im=control_image[0],
- width=8 * int(sx * control_image[0].width) // 8,
- height=8 * int(sy * control_image[0].height) // 8,
- context='add with forward'
- )
+ w, h = 8 * int(sx * control_image[0].width) // 8, 8 * int(sy * control_image[0].height) // 8
+ control_upscaled = images.resize_image(resize_mode=1 if sx==sy else 5, im=control_image[0], width=w, height=h, context='add with forward')
init_image = p.override or input_image
init_upscaled = None
if init_image is not None:
- init_upscaled = images.resize_image(resize_mode=1 if sx==sy else 5,
- im=init_image,
- width=8 * int(sx * init_image.width) // 8,
- height=8 * int(sy * init_image.height) // 8,
- context='add with forward'
- )
+ w, h = 8 * int(sx * init_image.width) // 8, 8 * int(sy * init_image.height) // 8
+ init_upscaled = images.resize_image(resize_mode=1 if sx==sy else 5, im=init_image, width=w, height=h, context='add with forward')
+ t1 = time.time()
+ shared.log.debug(f'Control Tile: scale={sx}x{sy} resize={"fixed" if sx==sy else "context"} control={control_upscaled} init={init_upscaled} time={t1-t0:.3f}')
# stop processing from restoring pipeline on each iteration
orig_restore_pipeline = getattr(shared.sd_model, 'restore_pipeline', None)
@@ -72,4 +68,6 @@ def run_tiling(p: processing.StableDiffusionProcessing, input_image: Image.Image
shared.sd_model.restore_pipeline = orig_restore_pipeline
if hasattr(shared.sd_model, 'restore_pipeline') and shared.sd_model.restore_pipeline is not None:
shared.sd_model.restore_pipeline()
+ t2 = time.time()
+ shared.log.debug(f'Control Tile: image={control_upscaled} time={t2-t0:.3f}')
return processed
diff --git a/modules/control/units/controlnet.py b/modules/control/units/controlnet.py
index 7361638c6..c887aca8f 100644
--- a/modules/control/units/controlnet.py
+++ b/modules/control/units/controlnet.py
@@ -101,6 +101,14 @@
"Alimama Inpainting": 'alimama-creative/SD3-Controlnet-Inpainting',
"Alimama SoftEdge": 'alimama-creative/SD3-Controlnet-Softedge',
}
+variants = {
+ 'NoobAI Canny XL': 'fp16',
+ 'NoobAI Lineart Anime XL': 'fp16',
+ 'NoobAI Depth XL': 'fp16',
+ 'NoobAI Normal XL': 'fp16',
+ 'NoobAI SoftEdge XL': 'fp16',
+ 'TTPlanet Tile Realistic XL': 'fp16',
+}
models = {}
all_models = {}
all_models.update(predefined_sd15)
@@ -261,8 +269,8 @@ def load(self, model_id: str = None, force: bool = True) -> str:
if cls is None:
log.error(f'Control {what} model load failed: id="{model_id}" unknown base model')
return
- if 'Eugeoter' in model_path:
- kwargs['variant'] = 'fp16'
+ if variants.get(model_id, None) is not None:
+ kwargs['variant'] = variants[model_id]
self.model = cls.from_pretrained(model_path, **self.load_config, **kwargs)
if self.model is None:
return
diff --git a/modules/freescale/__init__.py b/modules/freescale/__init__.py
new file mode 100644
index 000000000..7b9c17f5d
--- /dev/null
+++ b/modules/freescale/__init__.py
@@ -0,0 +1,4 @@
+# Credits: https://github.com/ali-vilab/FreeScale
+
+from .freescale_pipeline import StableDiffusionXLFreeScale
+from .freescale_pipeline_img2img import StableDiffusionXLFreeScaleImg2Img
diff --git a/modules/freescale/free_lunch_utils.py b/modules/freescale/free_lunch_utils.py
new file mode 100644
index 000000000..be26b732a
--- /dev/null
+++ b/modules/freescale/free_lunch_utils.py
@@ -0,0 +1,305 @@
+from typing import Any, Dict, Optional, Tuple
+import torch
+import torch.fft as fft
+from diffusers.utils import is_torch_version
+
+""" Borrowed from https://github.com/ChenyangSi/FreeU/blob/main/demo/free_lunch_utils.py
+"""
+
+def isinstance_str(x: object, cls_name: str):
+ """
+ Checks whether x has any class *named* cls_name in its ancestry.
+ Doesn't require access to the class's implementation.
+
+ Useful for patching!
+ """
+
+ for _cls in x.__class__.__mro__:
+ if _cls.__name__ == cls_name:
+ return True
+
+ return False
+
+
+def Fourier_filter(x, threshold, scale):
+ dtype = x.dtype
+ x = x.type(torch.float32)
+ # FFT
+ x_freq = fft.fftn(x, dim=(-2, -1))
+ x_freq = fft.fftshift(x_freq, dim=(-2, -1))
+
+ B, C, H, W = x_freq.shape
+ mask = torch.ones((B, C, H, W)).cuda()
+
+ crow, ccol = H // 2, W //2
+ mask[..., crow - threshold:crow + threshold, ccol - threshold:ccol + threshold] = scale
+ x_freq = x_freq * mask
+
+ # IFFT
+ x_freq = fft.ifftshift(x_freq, dim=(-2, -1))
+ x_filtered = fft.ifftn(x_freq, dim=(-2, -1)).real
+
+ x_filtered = x_filtered.type(dtype)
+ return x_filtered
+
+
+def register_upblock2d(model):
+ def up_forward(self):
+ def forward(hidden_states, res_hidden_states_tuple, temb=None, upsample_size=None):
+ for resnet in self.resnets:
+ # pop res hidden states
+ res_hidden_states = res_hidden_states_tuple[-1]
+ res_hidden_states_tuple = res_hidden_states_tuple[:-1]
+ #print(f"in upblock2d, hidden states shape: {hidden_states.shape}")
+ hidden_states = torch.cat([hidden_states, res_hidden_states], dim=1)
+
+ if self.training and self.gradient_checkpointing:
+
+ def create_custom_forward(module):
+ def custom_forward(*inputs):
+ return module(*inputs)
+
+ return custom_forward
+
+ if is_torch_version(">=", "1.11.0"):
+ hidden_states = torch.utils.checkpoint.checkpoint(
+ create_custom_forward(resnet), hidden_states, temb, use_reentrant=False
+ )
+ else:
+ hidden_states = torch.utils.checkpoint.checkpoint(
+ create_custom_forward(resnet), hidden_states, temb
+ )
+ else:
+ hidden_states = resnet(hidden_states, temb)
+
+ if self.upsamplers is not None:
+ for upsampler in self.upsamplers:
+ hidden_states = upsampler(hidden_states, upsample_size)
+
+ return hidden_states
+
+ return forward
+
+ for i, upsample_block in enumerate(model.unet.up_blocks):
+ if isinstance_str(upsample_block, "UpBlock2D"):
+ upsample_block.forward = up_forward(upsample_block)
+
+
+def register_free_upblock2d(model, b1=1.2, b2=1.4, s1=0.9, s2=0.2):
+ def up_forward(self):
+ def forward(hidden_states, res_hidden_states_tuple, temb=None, upsample_size=None):
+ for resnet in self.resnets:
+ # pop res hidden states
+ res_hidden_states = res_hidden_states_tuple[-1]
+ res_hidden_states_tuple = res_hidden_states_tuple[:-1]
+ #print(f"in free upblock2d, hidden states shape: {hidden_states.shape}")
+
+ # --------------- FreeU code -----------------------
+ # Only operate on the first two stages
+ if hidden_states.shape[1] == 1280:
+ hidden_states[:,:640] = hidden_states[:,:640] * self.b1
+ res_hidden_states = Fourier_filter(res_hidden_states, threshold=1, scale=self.s1)
+ if hidden_states.shape[1] == 640:
+ hidden_states[:,:320] = hidden_states[:,:320] * self.b2
+ res_hidden_states = Fourier_filter(res_hidden_states, threshold=1, scale=self.s2)
+ # ---------------------------------------------------------
+
+ hidden_states = torch.cat([hidden_states, res_hidden_states], dim=1)
+
+ if self.training and self.gradient_checkpointing:
+
+ def create_custom_forward(module):
+ def custom_forward(*inputs):
+ return module(*inputs)
+
+ return custom_forward
+
+ if is_torch_version(">=", "1.11.0"):
+ hidden_states = torch.utils.checkpoint.checkpoint(
+ create_custom_forward(resnet), hidden_states, temb, use_reentrant=False
+ )
+ else:
+ hidden_states = torch.utils.checkpoint.checkpoint(
+ create_custom_forward(resnet), hidden_states, temb
+ )
+ else:
+ hidden_states = resnet(hidden_states, temb)
+
+ if self.upsamplers is not None:
+ for upsampler in self.upsamplers:
+ hidden_states = upsampler(hidden_states, upsample_size)
+
+ return hidden_states
+
+ return forward
+
+ for i, upsample_block in enumerate(model.unet.up_blocks):
+ if isinstance_str(upsample_block, "UpBlock2D"):
+ upsample_block.forward = up_forward(upsample_block)
+ setattr(upsample_block, 'b1', b1)
+ setattr(upsample_block, 'b2', b2)
+ setattr(upsample_block, 's1', s1)
+ setattr(upsample_block, 's2', s2)
+
+
+def register_crossattn_upblock2d(model):
+ def up_forward(self):
+ def forward(
+ hidden_states: torch.FloatTensor,
+ res_hidden_states_tuple: Tuple[torch.FloatTensor, ...],
+ temb: Optional[torch.FloatTensor] = None,
+ encoder_hidden_states: Optional[torch.FloatTensor] = None,
+ cross_attention_kwargs: Optional[Dict[str, Any]] = None,
+ upsample_size: Optional[int] = None,
+ attention_mask: Optional[torch.FloatTensor] = None,
+ encoder_attention_mask: Optional[torch.FloatTensor] = None,
+ ):
+ for resnet, attn in zip(self.resnets, self.attentions):
+ # pop res hidden states
+ #print(f"in crossatten upblock2d, hidden states shape: {hidden_states.shape}")
+ res_hidden_states = res_hidden_states_tuple[-1]
+ res_hidden_states_tuple = res_hidden_states_tuple[:-1]
+ hidden_states = torch.cat([hidden_states, res_hidden_states], dim=1)
+
+ if self.training and self.gradient_checkpointing:
+
+ def create_custom_forward(module, return_dict=None):
+ def custom_forward(*inputs):
+ if return_dict is not None:
+ return module(*inputs, return_dict=return_dict)
+ else:
+ return module(*inputs)
+
+ return custom_forward
+
+ ckpt_kwargs: Dict[str, Any] = {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
+ hidden_states = torch.utils.checkpoint.checkpoint(
+ create_custom_forward(resnet),
+ hidden_states,
+ temb,
+ **ckpt_kwargs,
+ )
+ hidden_states = torch.utils.checkpoint.checkpoint(
+ create_custom_forward(attn, return_dict=False),
+ hidden_states,
+ encoder_hidden_states,
+ None, # timestep
+ None, # class_labels
+ cross_attention_kwargs,
+ attention_mask,
+ encoder_attention_mask,
+ **ckpt_kwargs,
+ )[0]
+ else:
+ hidden_states = resnet(hidden_states, temb)
+ hidden_states = attn(
+ hidden_states,
+ encoder_hidden_states=encoder_hidden_states,
+ cross_attention_kwargs=cross_attention_kwargs,
+ attention_mask=attention_mask,
+ encoder_attention_mask=encoder_attention_mask,
+ return_dict=False,
+ )[0]
+
+ if self.upsamplers is not None:
+ for upsampler in self.upsamplers:
+ hidden_states = upsampler(hidden_states, upsample_size)
+
+ return hidden_states
+
+ return forward
+
+ for i, upsample_block in enumerate(model.unet.up_blocks):
+ if isinstance_str(upsample_block, "CrossAttnUpBlock2D"):
+ upsample_block.forward = up_forward(upsample_block)
+
+
+def register_free_crossattn_upblock2d(model, b1=1.2, b2=1.4, s1=0.9, s2=0.2):
+ def up_forward(self):
+ def forward(
+ hidden_states: torch.FloatTensor,
+ res_hidden_states_tuple: Tuple[torch.FloatTensor, ...],
+ temb: Optional[torch.FloatTensor] = None,
+ encoder_hidden_states: Optional[torch.FloatTensor] = None,
+ cross_attention_kwargs: Optional[Dict[str, Any]] = None,
+ upsample_size: Optional[int] = None,
+ attention_mask: Optional[torch.FloatTensor] = None,
+ encoder_attention_mask: Optional[torch.FloatTensor] = None,
+ ):
+ for resnet, attn in zip(self.resnets, self.attentions):
+ # pop res hidden states
+ #print(f"in free crossatten upblock2d, hidden states shape: {hidden_states.shape}")
+ res_hidden_states = res_hidden_states_tuple[-1]
+ res_hidden_states_tuple = res_hidden_states_tuple[:-1]
+
+ # --------------- FreeU code -----------------------
+ # Only operate on the first two stages
+ if hidden_states.shape[1] == 1280:
+ hidden_states[:,:640] = hidden_states[:,:640] * self.b1
+ res_hidden_states = Fourier_filter(res_hidden_states, threshold=1, scale=self.s1)
+ if hidden_states.shape[1] == 640:
+ hidden_states[:,:320] = hidden_states[:,:320] * self.b2
+ res_hidden_states = Fourier_filter(res_hidden_states, threshold=1, scale=self.s2)
+ # ---------------------------------------------------------
+
+ hidden_states = torch.cat([hidden_states, res_hidden_states], dim=1)
+
+ if self.training and self.gradient_checkpointing:
+
+ def create_custom_forward(module, return_dict=None):
+ def custom_forward(*inputs):
+ if return_dict is not None:
+ return module(*inputs, return_dict=return_dict)
+ else:
+ return module(*inputs)
+
+ return custom_forward
+
+ ckpt_kwargs: Dict[str, Any] = {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
+ hidden_states = torch.utils.checkpoint.checkpoint(
+ create_custom_forward(resnet),
+ hidden_states,
+ temb,
+ **ckpt_kwargs,
+ )
+ hidden_states = torch.utils.checkpoint.checkpoint(
+ create_custom_forward(attn, return_dict=False),
+ hidden_states,
+ encoder_hidden_states,
+ None, # timestep
+ None, # class_labels
+ cross_attention_kwargs,
+ attention_mask,
+ encoder_attention_mask,
+ **ckpt_kwargs,
+ )[0]
+ else:
+ hidden_states = resnet(hidden_states, temb)
+ # hidden_states = attn(
+ # hidden_states,
+ # encoder_hidden_states=encoder_hidden_states,
+ # cross_attention_kwargs=cross_attention_kwargs,
+ # encoder_attention_mask=encoder_attention_mask,
+ # return_dict=False,
+ # )[0]
+ hidden_states = attn(
+ hidden_states,
+ encoder_hidden_states=encoder_hidden_states,
+ cross_attention_kwargs=cross_attention_kwargs,
+ )[0]
+
+ if self.upsamplers is not None:
+ for upsampler in self.upsamplers:
+ hidden_states = upsampler(hidden_states, upsample_size)
+
+ return hidden_states
+
+ return forward
+
+ for i, upsample_block in enumerate(model.unet.up_blocks):
+ if isinstance_str(upsample_block, "CrossAttnUpBlock2D"):
+ upsample_block.forward = up_forward(upsample_block)
+ setattr(upsample_block, 'b1', b1)
+ setattr(upsample_block, 'b2', b2)
+ setattr(upsample_block, 's1', s1)
+ setattr(upsample_block, 's2', s2)
diff --git a/modules/freescale/freescale_pipeline.py b/modules/freescale/freescale_pipeline.py
new file mode 100644
index 000000000..9b7a68b68
--- /dev/null
+++ b/modules/freescale/freescale_pipeline.py
@@ -0,0 +1,1189 @@
+from inspect import isfunction
+from functools import partial
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+import inspect
+import os
+import random
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+from einops import rearrange
+from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer
+
+from diffusers.image_processor import VaeImageProcessor
+from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
+from diffusers.models import AutoencoderKL, UNet2DConditionModel
+from diffusers.models.attention_processor import AttnProcessor2_0, LoRAAttnProcessor2_0, LoRAXFormersAttnProcessor, XFormersAttnProcessor
+from diffusers.schedulers import KarrasDiffusionSchedulers
+from diffusers.utils.torch_utils import randn_tensor
+from diffusers.utils import is_accelerate_available, is_accelerate_version, logging, replace_example_docstring
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline
+from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
+from diffusers.models.attention import BasicTransformerBlock
+
+from .scale_attention import ori_forward, scale_forward
+
+
+logger = logging.get_logger(__name__) # pylint: disable=invalid-name
+
+EXAMPLE_DOC_STRING = """
+ Examples:
+ ```py
+ >>> import torch
+ >>> from diffusers import StableDiffusionXLPipeline
+
+ >>> pipe = StableDiffusionXLPipeline.from_pretrained(
+ ... "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
+ ... )
+ >>> pipe = pipe.to("cuda")
+
+ >>> prompt = "a photo of an astronaut riding a horse on mars"
+ >>> image = pipe(prompt).images[0]
+ ```
+"""
+
+def default(val, d):
+ if exists(val):
+ return val
+ return d() if isfunction(d) else d
+
+def exists(val):
+ return val is not None
+
+def extract_into_tensor(a, t, x_shape):
+ b, *_ = t.shape
+ out = a.gather(-1, t)
+ return out.reshape(b, *((1,) * (len(x_shape) - 1)))
+
+def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):
+ if schedule == "linear":
+ betas = (
+ torch.linspace(linear_start ** 0.5, linear_end ** 0.5, n_timestep, dtype=torch.float64) ** 2
+ )
+ elif schedule == "cosine":
+ timesteps = (
+ torch.arange(n_timestep + 1, dtype=torch.float64) / n_timestep + cosine_s
+ )
+ alphas = timesteps / (1 + cosine_s) * np.pi / 2
+ alphas = torch.cos(alphas).pow(2)
+ alphas = alphas / alphas[0]
+ betas = 1 - alphas[1:] / alphas[:-1]
+ betas = np.clip(betas, a_min=0, a_max=0.999)
+ elif schedule == "sqrt_linear":
+ betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64)
+ elif schedule == "sqrt":
+ betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) ** 0.5
+ else:
+ raise ValueError(f"schedule '{schedule}' unknown.")
+ return betas.numpy()
+
+to_torch = partial(torch.tensor, dtype=torch.float16)
+betas = make_beta_schedule("linear", 1000, linear_start=0.00085, linear_end=0.012)
+alphas = 1. - betas
+alphas_cumprod = np.cumprod(alphas, axis=0)
+sqrt_alphas_cumprod = to_torch(np.sqrt(alphas_cumprod))
+sqrt_one_minus_alphas_cumprod = to_torch(np.sqrt(1. - alphas_cumprod))
+
+def q_sample(x_start, t, init_noise_sigma = 1.0, noise=None, device=None):
+ noise = default(noise, lambda: torch.randn_like(x_start)).to(device) * init_noise_sigma
+ return (extract_into_tensor(sqrt_alphas_cumprod.to(device), t, x_start.shape) * x_start +
+ extract_into_tensor(sqrt_one_minus_alphas_cumprod.to(device), t, x_start.shape) * noise)
+
+def get_views(height, width, h_window_size=128, w_window_size=128, h_window_stride=64, w_window_stride=64, vae_scale_factor=8):
+ height //= vae_scale_factor
+ width //= vae_scale_factor
+ num_blocks_height = int((height - h_window_size) / h_window_stride - 1e-6) + 2 if height > h_window_size else 1
+ num_blocks_width = int((width - w_window_size) / w_window_stride - 1e-6) + 2 if width > w_window_size else 1
+ total_num_blocks = int(num_blocks_height * num_blocks_width)
+ views = []
+ for i in range(total_num_blocks):
+ h_start = int((i // num_blocks_width) * h_window_stride)
+ h_end = h_start + h_window_size
+ w_start = int((i % num_blocks_width) * w_window_stride)
+ w_end = w_start + w_window_size
+
+ if h_end > height:
+ h_start = int(h_start + height - h_end)
+ h_end = int(height)
+ if w_end > width:
+ w_start = int(w_start + width - w_end)
+ w_end = int(width)
+ if h_start < 0:
+ h_end = int(h_end - h_start)
+ h_start = 0
+ if w_start < 0:
+ w_end = int(w_end - w_start)
+ w_start = 0
+
+ random_jitter = True
+ if random_jitter:
+ h_jitter_range = (h_window_size - h_window_stride) // 4
+ w_jitter_range = (w_window_size - w_window_stride) // 4
+ h_jitter = 0
+ w_jitter = 0
+
+ if (w_start != 0) and (w_end != width):
+ w_jitter = random.randint(-w_jitter_range, w_jitter_range)
+ elif (w_start == 0) and (w_end != width):
+ w_jitter = random.randint(-w_jitter_range, 0)
+ elif (w_start != 0) and (w_end == width):
+ w_jitter = random.randint(0, w_jitter_range)
+ if (h_start != 0) and (h_end != height):
+ h_jitter = random.randint(-h_jitter_range, h_jitter_range)
+ elif (h_start == 0) and (h_end != height):
+ h_jitter = random.randint(-h_jitter_range, 0)
+ elif (h_start != 0) and (h_end == height):
+ h_jitter = random.randint(0, h_jitter_range)
+ h_start += (h_jitter + h_jitter_range)
+ h_end += (h_jitter + h_jitter_range)
+ w_start += (w_jitter + w_jitter_range)
+ w_end += (w_jitter + w_jitter_range)
+
+ views.append((h_start, h_end, w_start, w_end))
+ return views
+
+def gaussian_kernel(kernel_size=3, sigma=1.0, channels=3):
+ x_coord = torch.arange(kernel_size)
+ gaussian_1d = torch.exp(-(x_coord - (kernel_size - 1) / 2) ** 2 / (2 * sigma ** 2))
+ gaussian_1d = gaussian_1d / gaussian_1d.sum()
+ gaussian_2d = gaussian_1d[:, None] * gaussian_1d[None, :]
+ kernel = gaussian_2d[None, None, :, :].repeat(channels, 1, 1, 1)
+
+ return kernel
+
+def gaussian_filter(latents, kernel_size=3, sigma=1.0):
+ channels = latents.shape[1]
+ kernel = gaussian_kernel(kernel_size, sigma, channels).to(latents.device, latents.dtype)
+ if len(latents.shape) == 5:
+ b = latents.shape[0]
+ latents = rearrange(latents, 'b c t i j -> (b t) c i j')
+ blurred_latents = F.conv2d(latents, kernel, padding=kernel_size//2, groups=channels)
+ blurred_latents = rearrange(blurred_latents, '(b t) c i j -> b c t i j', b=b)
+ else:
+ blurred_latents = F.conv2d(latents, kernel, padding=kernel_size//2, groups=channels)
+
+ return blurred_latents
+
+# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
+def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
+ """
+ Rescale `noise_cfg` according to `guidance_rescale`. Based on findings of [Common Diffusion Noise Schedules and
+ Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). See Section 3.4
+ """
+ std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True)
+ std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
+ # rescale the results from guidance (fixes overexposure)
+ noise_pred_rescaled = noise_cfg * (std_text / std_cfg)
+ # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images
+ noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg
+ return noise_cfg
+
+
+class StableDiffusionXLFreeScale(DiffusionPipeline, FromSingleFileMixin, LoraLoaderMixin):
+ r"""
+ Pipeline for text-to-image generation using Stable Diffusion XL.
+
+ This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
+ library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
+
+ In addition the pipeline inherits the following loading methods:
+ - *LoRA*: [`StableDiffusionXLPipeline.load_lora_weights`]
+ - *Ckpt*: [`loaders.FromSingleFileMixin.from_single_file`]
+
+ as well as the following saving methods:
+ - *LoRA*: [`loaders.StableDiffusionXLPipeline.save_lora_weights`]
+
+ Args:
+ vae ([`AutoencoderKL`]):
+ Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
+ text_encoder ([`CLIPTextModel`]):
+ Frozen text-encoder. Stable Diffusion XL uses the text portion of
+ [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel), specifically
+ the [clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14) variant.
+ text_encoder_2 ([` CLIPTextModelWithProjection`]):
+ Second frozen text-encoder. Stable Diffusion XL uses the text and pool portion of
+ [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModelWithProjection),
+ specifically the
+ [laion/CLIP-ViT-bigG-14-laion2B-39B-b160k](https://huggingface.co/laion/CLIP-ViT-bigG-14-laion2B-39B-b160k)
+ variant.
+ tokenizer (`CLIPTokenizer`):
+ Tokenizer of class
+ [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer).
+ tokenizer_2 (`CLIPTokenizer`):
+ Second Tokenizer of class
+ [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer).
+ unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents.
+ scheduler ([`SchedulerMixin`]):
+ A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
+ [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
+ """
+
+ def __init__(
+ self,
+ vae: AutoencoderKL,
+ text_encoder: CLIPTextModel,
+ text_encoder_2: CLIPTextModelWithProjection,
+ tokenizer: CLIPTokenizer,
+ tokenizer_2: CLIPTokenizer,
+ unet: UNet2DConditionModel,
+ scheduler: KarrasDiffusionSchedulers,
+ force_zeros_for_empty_prompt: bool = True,
+ ):
+ super().__init__()
+
+ self.register_modules(
+ vae=vae,
+ text_encoder=text_encoder,
+ text_encoder_2=text_encoder_2,
+ tokenizer=tokenizer,
+ tokenizer_2=tokenizer_2,
+ unet=unet,
+ scheduler=scheduler,
+ )
+ self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
+ self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
+ self.default_sample_size = self.unet.config.sample_size
+ self.vae.enable_tiling()
+
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
+ def enable_vae_slicing(self):
+ r"""
+ Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
+ compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
+ """
+ self.vae.enable_slicing()
+
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
+ def disable_vae_slicing(self):
+ r"""
+ Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
+ computing decoding in one step.
+ """
+ self.vae.disable_slicing()
+
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
+ def enable_vae_tiling(self):
+ r"""
+ Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
+ compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
+ processing larger images.
+ """
+ self.vae.enable_tiling()
+
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
+ def disable_vae_tiling(self):
+ r"""
+ Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
+ computing decoding in one step.
+ """
+ self.vae.disable_tiling()
+
+ def enable_model_cpu_offload(self, gpu_id=0):
+ r"""
+ Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
+ to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
+ method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
+ `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
+ """
+ if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
+ from accelerate import cpu_offload_with_hook
+ else:
+ raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
+
+ device = torch.device(f"cuda:{gpu_id}")
+
+ if self.device.type != "cpu":
+ self.to("cpu", silence_dtype_warnings=True)
+ torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
+
+ model_sequence = (
+ [self.text_encoder, self.text_encoder_2] if self.text_encoder is not None else [self.text_encoder_2]
+ )
+ model_sequence.extend([self.unet, self.vae])
+
+ hook = None
+ for cpu_offloaded_model in model_sequence:
+ _, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
+
+ # We'll offload the last model manually.
+ self.final_offload_hook = hook
+
+ def encode_prompt(
+ self,
+ prompt: str,
+ prompt_2: Optional[str] = None,
+ device: Optional[torch.device] = None,
+ num_images_per_prompt: int = 1,
+ do_classifier_free_guidance: bool = True,
+ negative_prompt: Optional[str] = None,
+ negative_prompt_2: Optional[str] = None,
+ prompt_embeds: Optional[torch.FloatTensor] = None,
+ negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+ pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+ negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+ lora_scale: Optional[float] = None,
+ ):
+ r"""
+ Encodes the prompt into text encoder hidden states.
+
+ Args:
+ prompt (`str` or `List[str]`, *optional*):
+ prompt to be encoded
+ prompt_2 (`str` or `List[str]`, *optional*):
+ The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
+ used in both text-encoders
+ device: (`torch.device`):
+ torch device
+ num_images_per_prompt (`int`):
+ number of images that should be generated per prompt
+ do_classifier_free_guidance (`bool`):
+ whether to use classifier free guidance or not
+ negative_prompt (`str` or `List[str]`, *optional*):
+ The prompt or prompts not to guide the image generation. If not defined, one has to pass
+ `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
+ less than `1`).
+ negative_prompt_2 (`str` or `List[str]`, *optional*):
+ The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
+ `text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
+ prompt_embeds (`torch.FloatTensor`, *optional*):
+ Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
+ provided, text embeddings will be generated from `prompt` input argument.
+ negative_prompt_embeds (`torch.FloatTensor`, *optional*):
+ Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+ weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
+ argument.
+ pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+ Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
+ If not provided, pooled text embeddings will be generated from `prompt` input argument.
+ negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+ Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+ weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
+ input argument.
+ lora_scale (`float`, *optional*):
+ A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
+ """
+ device = device or self._execution_device
+
+ # set lora scale so that monkey patched LoRA
+ # function of text encoder can correctly access it
+ if lora_scale is not None and isinstance(self, LoraLoaderMixin):
+ self._lora_scale = lora_scale
+
+ if prompt is not None and isinstance(prompt, str):
+ batch_size = 1
+ elif prompt is not None and isinstance(prompt, list):
+ batch_size = len(prompt)
+ else:
+ batch_size = prompt_embeds.shape[0]
+
+ # Define tokenizers and text encoders
+ tokenizers = [self.tokenizer, self.tokenizer_2] if self.tokenizer is not None else [self.tokenizer_2]
+ text_encoders = (
+ [self.text_encoder, self.text_encoder_2] if self.text_encoder is not None else [self.text_encoder_2]
+ )
+
+ if prompt_embeds is None:
+ prompt_2 = prompt_2 or prompt
+ # textual inversion: procecss multi-vector tokens if necessary
+ prompt_embeds_list = []
+ prompts = [prompt, prompt_2]
+ for prompt, tokenizer, text_encoder in zip(prompts, tokenizers, text_encoders):
+ if isinstance(self, TextualInversionLoaderMixin):
+ prompt = self.maybe_convert_prompt(prompt, tokenizer)
+
+ text_inputs = tokenizer(
+ prompt,
+ padding="max_length",
+ max_length=tokenizer.model_max_length,
+ truncation=True,
+ return_tensors="pt",
+ )
+
+ text_input_ids = text_inputs.input_ids
+ untruncated_ids = tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
+
+ if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(
+ text_input_ids, untruncated_ids
+ ):
+ removed_text = tokenizer.batch_decode(untruncated_ids[:, tokenizer.model_max_length - 1 : -1])
+ logger.warning(
+ "The following part of your input was truncated because CLIP can only handle sequences up to"
+ f" {tokenizer.model_max_length} tokens: {removed_text}"
+ )
+
+ prompt_embeds = text_encoder(
+ text_input_ids.to(device),
+ output_hidden_states=True,
+ )
+
+ # We are only ALWAYS interested in the pooled output of the final text encoder
+ pooled_prompt_embeds = prompt_embeds[0]
+ prompt_embeds = prompt_embeds.hidden_states[-2]
+
+ prompt_embeds_list.append(prompt_embeds)
+
+ prompt_embeds = torch.concat(prompt_embeds_list, dim=-1)
+
+ # get unconditional embeddings for classifier free guidance
+ zero_out_negative_prompt = negative_prompt is None and self.config.force_zeros_for_empty_prompt
+ if do_classifier_free_guidance and negative_prompt_embeds is None and zero_out_negative_prompt:
+ negative_prompt_embeds = torch.zeros_like(prompt_embeds)
+ negative_pooled_prompt_embeds = torch.zeros_like(pooled_prompt_embeds)
+ elif do_classifier_free_guidance and negative_prompt_embeds is None:
+ negative_prompt = negative_prompt or ""
+ negative_prompt_2 = negative_prompt_2 or negative_prompt
+
+ uncond_tokens: List[str]
+ if prompt is not None and type(prompt) is not type(negative_prompt):
+ raise TypeError(
+ f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !="
+ f" {type(prompt)}."
+ )
+ elif isinstance(negative_prompt, str):
+ uncond_tokens = [negative_prompt, negative_prompt_2]
+ elif batch_size != len(negative_prompt):
+ raise ValueError(
+ f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:"
+ f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches"
+ " the batch size of `prompt`."
+ )
+ else:
+ uncond_tokens = [negative_prompt, negative_prompt_2]
+
+ negative_prompt_embeds_list = []
+ for negative_prompt, tokenizer, text_encoder in zip(uncond_tokens, tokenizers, text_encoders):
+ if isinstance(self, TextualInversionLoaderMixin):
+ negative_prompt = self.maybe_convert_prompt(negative_prompt, tokenizer)
+
+ max_length = prompt_embeds.shape[1]
+ uncond_input = tokenizer(
+ negative_prompt,
+ padding="max_length",
+ max_length=max_length,
+ truncation=True,
+ return_tensors="pt",
+ )
+
+ negative_prompt_embeds = text_encoder(
+ uncond_input.input_ids.to(device),
+ output_hidden_states=True,
+ )
+ # We are only ALWAYS interested in the pooled output of the final text encoder
+ negative_pooled_prompt_embeds = negative_prompt_embeds[0]
+ negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
+
+ negative_prompt_embeds_list.append(negative_prompt_embeds)
+
+ negative_prompt_embeds = torch.concat(negative_prompt_embeds_list, dim=-1)
+
+ prompt_embeds = prompt_embeds.to(dtype=self.text_encoder_2.dtype, device=device)
+ bs_embed, seq_len, _ = prompt_embeds.shape
+ # duplicate text embeddings for each generation per prompt, using mps friendly method
+ prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1)
+ prompt_embeds = prompt_embeds.view(bs_embed * num_images_per_prompt, seq_len, -1)
+
+ if do_classifier_free_guidance:
+ # duplicate unconditional embeddings for each generation per prompt, using mps friendly method
+ seq_len = negative_prompt_embeds.shape[1]
+ negative_prompt_embeds = negative_prompt_embeds.to(dtype=self.text_encoder_2.dtype, device=device)
+ negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
+ negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
+
+ pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt).view(
+ bs_embed * num_images_per_prompt, -1
+ )
+ if do_classifier_free_guidance:
+ negative_pooled_prompt_embeds = negative_pooled_prompt_embeds.repeat(1, num_images_per_prompt).view(
+ bs_embed * num_images_per_prompt, -1
+ )
+
+ return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
+
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
+ def prepare_extra_step_kwargs(self, generator, eta):
+ # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
+ # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
+ # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
+ # and should be between [0, 1]
+
+ accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
+ extra_step_kwargs = {}
+ if accepts_eta:
+ extra_step_kwargs["eta"] = eta
+
+ # check if the scheduler accepts generator
+ accepts_generator = "generator" in set(inspect.signature(self.scheduler.step).parameters.keys())
+ if accepts_generator:
+ extra_step_kwargs["generator"] = generator
+ return extra_step_kwargs
+
+ def check_inputs(
+ self,
+ prompt,
+ prompt_2,
+ height,
+ width,
+ callback_steps,
+ negative_prompt=None,
+ negative_prompt_2=None,
+ prompt_embeds=None,
+ negative_prompt_embeds=None,
+ pooled_prompt_embeds=None,
+ negative_pooled_prompt_embeds=None,
+ ):
+ if height % 8 != 0 or width % 8 != 0:
+ raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
+
+ if (callback_steps is None) or (
+ callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0)
+ ):
+ raise ValueError(
+ f"`callback_steps` has to be a positive integer but is {callback_steps} of type"
+ f" {type(callback_steps)}."
+ )
+
+ if prompt is not None and prompt_embeds is not None:
+ raise ValueError(
+ f"Cannot forward both `prompt`: {prompt} and `prompt_embeds`: {prompt_embeds}. Please make sure to"
+ " only forward one of the two."
+ )
+ elif prompt_2 is not None and prompt_embeds is not None:
+ raise ValueError(
+ f"Cannot forward both `prompt_2`: {prompt_2} and `prompt_embeds`: {prompt_embeds}. Please make sure to"
+ " only forward one of the two."
+ )
+ elif prompt is None and prompt_embeds is None:
+ raise ValueError(
+ "Provide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined."
+ )
+ elif prompt is not None and (not isinstance(prompt, str) and not isinstance(prompt, list)):
+ raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}")
+ elif prompt_2 is not None and (not isinstance(prompt_2, str) and not isinstance(prompt_2, list)):
+ raise ValueError(f"`prompt_2` has to be of type `str` or `list` but is {type(prompt_2)}")
+
+ if negative_prompt is not None and negative_prompt_embeds is not None:
+ raise ValueError(
+ f"Cannot forward both `negative_prompt`: {negative_prompt} and `negative_prompt_embeds`:"
+ f" {negative_prompt_embeds}. Please make sure to only forward one of the two."
+ )
+ elif negative_prompt_2 is not None and negative_prompt_embeds is not None:
+ raise ValueError(
+ f"Cannot forward both `negative_prompt_2`: {negative_prompt_2} and `negative_prompt_embeds`:"
+ f" {negative_prompt_embeds}. Please make sure to only forward one of the two."
+ )
+
+ if prompt_embeds is not None and negative_prompt_embeds is not None:
+ if prompt_embeds.shape != negative_prompt_embeds.shape:
+ raise ValueError(
+ "`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but"
+ f" got: `prompt_embeds` {prompt_embeds.shape} != `negative_prompt_embeds`"
+ f" {negative_prompt_embeds.shape}."
+ )
+
+ if prompt_embeds is not None and pooled_prompt_embeds is None:
+ raise ValueError(
+ "If `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`."
+ )
+
+ if negative_prompt_embeds is not None and negative_pooled_prompt_embeds is None:
+ raise ValueError(
+ "If `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`."
+ )
+
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
+ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
+ shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
+ if isinstance(generator, list) and len(generator) != batch_size:
+ raise ValueError(
+ f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
+ f" size of {batch_size}. Make sure the batch size matches the length of the generators."
+ )
+
+ if latents is None:
+ latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
+ else:
+ latents = latents.to(device)
+
+ # scale the initial noise by the standard deviation required by the scheduler
+ latents = latents * self.scheduler.init_noise_sigma
+ return latents
+
+ def _get_add_time_ids(self, original_size, crops_coords_top_left, target_size, dtype):
+ add_time_ids = list(original_size + crops_coords_top_left + target_size)
+
+ passed_add_embed_dim = (
+ self.unet.config.addition_time_embed_dim * len(add_time_ids) + self.text_encoder_2.config.projection_dim
+ )
+ expected_add_embed_dim = self.unet.add_embedding.linear_1.in_features
+
+ if expected_add_embed_dim != passed_add_embed_dim:
+ raise ValueError(
+ f"Model expects an added time embedding vector of length {expected_add_embed_dim}, but a vector of {passed_add_embed_dim} was created. The model has an incorrect config. Please check `unet.config.time_embedding_type` and `text_encoder_2.config.projection_dim`."
+ )
+
+ add_time_ids = torch.tensor([add_time_ids], dtype=dtype)
+ return add_time_ids
+
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_upscale.StableDiffusionUpscalePipeline.upcast_vae
+ def upcast_vae(self):
+ dtype = self.vae.dtype
+ self.vae.to(dtype=torch.float32)
+ use_torch_2_0_or_xformers = isinstance(
+ self.vae.decoder.mid_block.attentions[0].processor,
+ (
+ AttnProcessor2_0,
+ XFormersAttnProcessor,
+ LoRAXFormersAttnProcessor,
+ LoRAAttnProcessor2_0,
+ ),
+ )
+ # if xformers or torch_2_0 is used attention block does not need
+ # to be in float32 which can save lots of memory
+ if use_torch_2_0_or_xformers:
+ self.vae.post_quant_conv.to(dtype)
+ self.vae.decoder.conv_in.to(dtype)
+ self.vae.decoder.mid_block.to(dtype)
+
+ @torch.no_grad()
+ @replace_example_docstring(EXAMPLE_DOC_STRING)
+ def __call__(
+ self,
+ prompt: Union[str, List[str]] = None,
+ prompt_2: Optional[Union[str, List[str]]] = None,
+ height: Optional[int] = None,
+ width: Optional[int] = None,
+ num_inference_steps: int = 50,
+ denoising_end: Optional[float] = None,
+ guidance_scale: float = 5.0,
+ negative_prompt: Optional[Union[str, List[str]]] = None,
+ negative_prompt_2: Optional[Union[str, List[str]]] = None,
+ num_images_per_prompt: Optional[int] = 1,
+ eta: float = 0.0,
+ generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
+ latents: Optional[torch.FloatTensor] = None,
+ prompt_embeds: Optional[torch.FloatTensor] = None,
+ negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+ pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+ negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+ output_type: Optional[str] = "pil",
+ return_dict: bool = True,
+ callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
+ callback_steps: int = 1,
+ cross_attention_kwargs: Optional[Dict[str, Any]] = None,
+ guidance_rescale: float = 0.0,
+ original_size: Optional[Tuple[int, int]] = None,
+ crops_coords_top_left: Tuple[int, int] = (0, 0),
+ target_size: Optional[Tuple[int, int]] = None,
+ resolutions_list: Optional[Union[int, List[int]]] = None,
+ restart_steps: Optional[Union[int, List[int]]] = None,
+ cosine_scale: float = 2.0,
+ dilate_tau: int = 35,
+ ):
+ r"""
+ Function invoked when calling the pipeline for generation.
+
+ Args:
+ prompt (`str` or `List[str]`, *optional*):
+ The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
+ instead.
+ prompt_2 (`str` or `List[str]`, *optional*):
+ The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
+ used in both text-encoders
+ height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
+ The height in pixels of the generated image.
+ width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
+ The width in pixels of the generated image.
+ num_inference_steps (`int`, *optional*, defaults to 50):
+ The number of denoising steps. More denoising steps usually lead to a higher quality image at the
+ expense of slower inference.
+ denoising_end (`float`, *optional*):
+ When specified, determines the fraction (between 0.0 and 1.0) of the total denoising process to be
+ completed before it is intentionally prematurely terminated. As a result, the returned sample will
+ still retain a substantial amount of noise as determined by the discrete timesteps selected by the
+ scheduler. The denoising_end parameter should ideally be utilized when this pipeline forms a part of a
+ "Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
+ Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output)
+ guidance_scale (`float`, *optional*, defaults to 5.0):
+ Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
+ `guidance_scale` is defined as `w` of equation 2. of [Imagen
+ Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
+ 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
+ usually at the expense of lower image quality.
+ negative_prompt (`str` or `List[str]`, *optional*):
+ The prompt or prompts not to guide the image generation. If not defined, one has to pass
+ `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
+ less than `1`).
+ negative_prompt_2 (`str` or `List[str]`, *optional*):
+ The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
+ `text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
+ num_images_per_prompt (`int`, *optional*, defaults to 1):
+ The number of images to generate per prompt.
+ eta (`float`, *optional*, defaults to 0.0):
+ Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
+ [`schedulers.DDIMScheduler`], will be ignored for others.
+ generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
+ One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
+ to make generation deterministic.
+ latents (`torch.FloatTensor`, *optional*):
+ Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
+ generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
+ tensor will ge generated by sampling using the supplied random `generator`.
+ prompt_embeds (`torch.FloatTensor`, *optional*):
+ Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
+ provided, text embeddings will be generated from `prompt` input argument.
+ negative_prompt_embeds (`torch.FloatTensor`, *optional*):
+ Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+ weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
+ argument.
+ pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+ Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
+ If not provided, pooled text embeddings will be generated from `prompt` input argument.
+ negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+ Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+ weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
+ input argument.
+ output_type (`str`, *optional*, defaults to `"pil"`):
+ The output format of the generate image. Choose between
+ [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
+ return_dict (`bool`, *optional*, defaults to `True`):
+ Whether or not to return a [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] instead
+ of a plain tuple.
+ callback (`Callable`, *optional*):
+ A function that will be called every `callback_steps` steps during inference. The function will be
+ called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
+ callback_steps (`int`, *optional*, defaults to 1):
+ The frequency at which the `callback` function will be called. If not specified, the callback will be
+ called at every step.
+ cross_attention_kwargs (`dict`, *optional*):
+ A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
+ `self.processor` in
+ [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
+ guidance_rescale (`float`, *optional*, defaults to 0.7):
+ Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
+ Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of
+ [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).
+ Guidance rescale factor should fix overexposure when using zero terminal SNR.
+ original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
+ If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
+ `original_size` defaults to `(width, height)` if not specified. Part of SDXL's micro-conditioning as
+ explained in section 2.2 of
+ [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
+ crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
+ `crops_coords_top_left` can be used to generate an image that appears to be "cropped" from the position
+ `crops_coords_top_left` downwards. Favorable, well-centered images are usually achieved by setting
+ `crops_coords_top_left` to (0, 0). Part of SDXL's micro-conditioning as explained in section 2.2 of
+ [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
+ target_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
+ For most cases, `target_size` should be set to the desired height and width of the generated image. If
+ not specified it will default to `(width, height)`. Part of SDXL's micro-conditioning as explained in
+ section 2.2 of [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
+
+ Examples:
+
+ Returns:
+ [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] or `tuple`:
+ [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] if `return_dict` is True, otherwise a
+ `tuple`. When returning a tuple, the first element is a list with the generated images.
+ """
+
+ # 0. Default height and width to unet
+ if resolutions_list:
+ height, width = resolutions_list[0]
+ target_sizes = resolutions_list[1:]
+ if not restart_steps:
+ restart_steps = [15] * len(target_sizes)
+ else:
+ height = height or self.default_sample_size * self.vae_scale_factor
+ width = width or self.default_sample_size * self.vae_scale_factor
+
+ original_size = original_size or (height, width)
+ target_size = target_size or (height, width)
+
+ # 1. Check inputs. Raise error if not correct
+ self.check_inputs(
+ prompt,
+ prompt_2,
+ height,
+ width,
+ callback_steps,
+ negative_prompt,
+ negative_prompt_2,
+ prompt_embeds,
+ negative_prompt_embeds,
+ pooled_prompt_embeds,
+ negative_pooled_prompt_embeds,
+ )
+
+ # 2. Define call parameters
+ if prompt is not None and isinstance(prompt, str):
+ batch_size = 1
+ elif prompt is not None and isinstance(prompt, list):
+ batch_size = len(prompt)
+ else:
+ batch_size = prompt_embeds.shape[0]
+
+ device = self._execution_device
+
+ # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
+ # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
+ # corresponds to doing no classifier free guidance.
+ do_classifier_free_guidance = guidance_scale > 1.0
+
+ # 3. Encode input prompt
+ text_encoder_lora_scale = (
+ cross_attention_kwargs.get("scale", None) if cross_attention_kwargs is not None else None
+ )
+ (
+ prompt_embeds,
+ negative_prompt_embeds,
+ pooled_prompt_embeds,
+ negative_pooled_prompt_embeds,
+ ) = self.encode_prompt(
+ prompt=prompt,
+ prompt_2=prompt_2,
+ device=device,
+ num_images_per_prompt=num_images_per_prompt,
+ do_classifier_free_guidance=do_classifier_free_guidance,
+ negative_prompt=negative_prompt,
+ negative_prompt_2=negative_prompt_2,
+ prompt_embeds=prompt_embeds,
+ negative_prompt_embeds=negative_prompt_embeds,
+ pooled_prompt_embeds=pooled_prompt_embeds,
+ negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
+ lora_scale=text_encoder_lora_scale,
+ )
+
+ # 4. Prepare timesteps
+ self.scheduler.set_timesteps(num_inference_steps, device=device)
+
+ timesteps = self.scheduler.timesteps
+
+ # 5. Prepare latent variables
+ num_channels_latents = self.unet.config.in_channels
+ latents = self.prepare_latents(
+ batch_size * num_images_per_prompt,
+ num_channels_latents,
+ height,
+ width,
+ prompt_embeds.dtype,
+ device,
+ generator,
+ latents,
+ )
+
+ # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
+ extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
+
+ # 7. Prepare added time ids & embeddings
+ add_text_embeds = pooled_prompt_embeds
+ add_time_ids = self._get_add_time_ids(
+ original_size, crops_coords_top_left, target_size, dtype=prompt_embeds.dtype
+ )
+
+ if do_classifier_free_guidance:
+ prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
+ add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0)
+ add_time_ids = torch.cat([add_time_ids, add_time_ids], dim=0)
+
+ prompt_embeds = prompt_embeds.to(device)
+ add_text_embeds = add_text_embeds.to(device)
+ add_time_ids = add_time_ids.to(device).repeat(batch_size * num_images_per_prompt, 1)
+
+ # 8. Denoising loop
+ num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
+
+ # 9.1 Apply denoising_end
+ if denoising_end is not None and type(denoising_end) == float and denoising_end > 0 and denoising_end < 1:
+ discrete_timestep_cutoff = int(
+ round(
+ self.scheduler.config.num_train_timesteps
+ - (denoising_end * self.scheduler.config.num_train_timesteps)
+ )
+ )
+ num_inference_steps = len(list(filter(lambda ts: ts >= discrete_timestep_cutoff, timesteps)))
+ timesteps = timesteps[:num_inference_steps]
+
+ results_list = []
+
+ for block in self.unet.down_blocks + [self.unet.mid_block] + self.unet.up_blocks:
+ for module in block.modules():
+ if isinstance(module, BasicTransformerBlock):
+ module.forward = ori_forward.__get__(module, BasicTransformerBlock)
+
+ with self.progress_bar(total=num_inference_steps) as progress_bar:
+ for i, t in enumerate(timesteps):
+ # expand the latents if we are doing classifier free guidance
+ latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
+
+ latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
+
+ # predict the noise residual
+ added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids}
+ noise_pred = self.unet(
+ latent_model_input,
+ t,
+ encoder_hidden_states=prompt_embeds,
+ cross_attention_kwargs=cross_attention_kwargs,
+ added_cond_kwargs=added_cond_kwargs,
+ return_dict=False,
+ )[0]
+
+ # perform guidance
+ if do_classifier_free_guidance:
+ noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+ noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
+
+ if do_classifier_free_guidance and guidance_rescale > 0.0:
+ # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
+ noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
+
+ # compute the previous noisy sample x_t -> x_t-1
+ latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
+
+ # call the callback, if provided
+ if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
+ progress_bar.update()
+ if callback is not None and i % callback_steps == 0:
+ callback(i, t, latents)
+ results_list.append(latents)
+
+ for restart_index, target_size in enumerate(target_sizes):
+ restart_step = restart_steps[restart_index]
+ target_size_ = [target_size[0]//8, target_size[1]//8]
+
+ for block in self.unet.down_blocks + [self.unet.mid_block] + self.unet.up_blocks:
+ for module in block.modules():
+ if isinstance(module, BasicTransformerBlock):
+ module.forward = scale_forward.__get__(module, BasicTransformerBlock)
+ module.current_hw = target_size
+
+ needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
+ if needs_upcasting:
+ self.upcast_vae()
+ latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
+
+ latents = latents / self.vae.config.scaling_factor
+ image = self.vae.decode(latents, return_dict=False)[0]
+ image = torch.nn.functional.interpolate(
+ image,
+ size=target_size,
+ mode='bicubic',
+ )
+ latents = self.vae.encode(image).latent_dist.sample().to(self.vae.dtype)
+ latents = latents * self.vae.config.scaling_factor
+
+ noise_latents = []
+ noise = torch.randn_like(latents)
+ for timestep in self.scheduler.timesteps:
+ noise_latent = self.scheduler.add_noise(latents, noise, timestep.unsqueeze(0))
+ noise_latents.append(noise_latent)
+ latents = noise_latents[restart_step]
+
+ self.scheduler._step_index = 0
+ with self.progress_bar(total=num_inference_steps) as progress_bar:
+ for i, t in enumerate(timesteps):
+
+ if i < restart_step:
+ self.scheduler._step_index += 1
+ progress_bar.update()
+ continue
+
+ cosine_factor = 0.5 * (1 + torch.cos(torch.pi * (self.scheduler.config.num_train_timesteps - t) / self.scheduler.config.num_train_timesteps)).cpu()
+ c1 = cosine_factor ** cosine_scale
+ latents = latents * (1 - c1) + noise_latents[i] * c1
+
+ dilate_coef=target_size[1]//1024
+
+ dilate_layers = [
+ # "down_blocks.1.resnets.0.conv1",
+ # "down_blocks.1.resnets.0.conv2",
+ # "down_blocks.1.resnets.1.conv1",
+ # "down_blocks.1.resnets.1.conv2",
+ "down_blocks.1.downsamplers.0.conv",
+ "down_blocks.2.resnets.0.conv1",
+ "down_blocks.2.resnets.0.conv2",
+ "down_blocks.2.resnets.1.conv1",
+ "down_blocks.2.resnets.1.conv2",
+ # "up_blocks.0.resnets.0.conv1",
+ # "up_blocks.0.resnets.0.conv2",
+ # "up_blocks.0.resnets.1.conv1",
+ # "up_blocks.0.resnets.1.conv2",
+ # "up_blocks.0.resnets.2.conv1",
+ # "up_blocks.0.resnets.2.conv2",
+ # "up_blocks.0.upsamplers.0.conv",
+ # "up_blocks.1.resnets.0.conv1",
+ # "up_blocks.1.resnets.0.conv2",
+ # "up_blocks.1.resnets.1.conv1",
+ # "up_blocks.1.resnets.1.conv2",
+ # "up_blocks.1.resnets.2.conv1",
+ # "up_blocks.1.resnets.2.conv2",
+ # "up_blocks.1.upsamplers.0.conv",
+ # "up_blocks.2.resnets.0.conv1",
+ # "up_blocks.2.resnets.0.conv2",
+ # "up_blocks.2.resnets.1.conv1",
+ # "up_blocks.2.resnets.1.conv2",
+ # "up_blocks.2.resnets.2.conv1",
+ # "up_blocks.2.resnets.2.conv2",
+ "mid_block.resnets.0.conv1",
+ "mid_block.resnets.0.conv2",
+ "mid_block.resnets.1.conv1",
+ "mid_block.resnets.1.conv2"
+ ]
+
+ for name, module in self.unet.named_modules():
+ if name in dilate_layers:
+ if i < dilate_tau:
+ module.dilation = (dilate_coef, dilate_coef)
+ module.padding = (dilate_coef, dilate_coef)
+ else:
+ module.dilation = (1, 1)
+ module.padding = (1, 1)
+
+ # expand the latents if we are doing classifier free guidance
+ latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
+
+ latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
+
+
+ # predict the noise residual
+ added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids}
+ noise_pred = self.unet(
+ latent_model_input,
+ t,
+ encoder_hidden_states=prompt_embeds,
+ cross_attention_kwargs=cross_attention_kwargs,
+ added_cond_kwargs=added_cond_kwargs,
+ return_dict=False,
+ )[0]
+
+ # perform guidance
+ if do_classifier_free_guidance:
+ noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+ noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
+
+ if do_classifier_free_guidance and guidance_rescale > 0.0:
+ # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
+ noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
+
+ # compute the previous noisy sample x_t -> x_t-1
+ latents_dtype = latents.dtype
+ latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
+ if latents.dtype != latents_dtype:
+ if torch.backends.mps.is_available():
+ # some platforms (eg. apple mps) misbehave due to a pytorch bug: https://github.com/pytorch/pytorch/pull/99272
+ latents = latents.to(latents_dtype)
+
+ # call the callback, if provided
+ if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
+ progress_bar.update()
+ if callback is not None and i % callback_steps == 0:
+ callback(i, t, latents)
+
+ for name, module in self.unet.named_modules():
+ # if ('.conv' in name) and ('.conv_' not in name):
+ if name in dilate_layers:
+ module.dilation = (1, 1)
+ module.padding = (1, 1)
+
+ results_list.append(latents)
+
+ """
+ final_results = []
+ for latents in results_list:
+ # make sure the VAE is in float32 mode, as it overflows in float16
+ if self.vae.dtype == torch.float16 and self.vae.config.force_upcast:
+ self.upcast_vae()
+ latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
+
+ if not output_type == "latent":
+ image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
+ else:
+ image = latents
+ return StableDiffusionXLPipelineOutput(images=image)
+
+ image = self.image_processor.postprocess(image, output_type=output_type)
+
+ if not return_dict:
+ final_results += [(image,)]
+ else:
+ final_results += [StableDiffusionXLPipelineOutput(images=image)]
+
+ # Offload last model to CPU
+ if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
+ self.final_offload_hook.offload()
+
+ return final_results
+ """
+ return StableDiffusionXLPipelineOutput(images=results_list)
+
+ # Overrride to properly handle the loading and unloading of the additional text encoder.
+ def load_lora_weights(self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], **kwargs):
+ # We could have accessed the unet config from `lora_state_dict()` too. We pass
+ # it here explicitly to be able to tell that it's coming from an SDXL
+ # pipeline.
+ state_dict, network_alphas = self.lora_state_dict(
+ pretrained_model_name_or_path_or_dict,
+ unet_config=self.unet.config,
+ **kwargs,
+ )
+ self.load_lora_into_unet(state_dict, network_alphas=network_alphas, unet=self.unet)
+
+ text_encoder_state_dict = {k: v for k, v in state_dict.items() if "text_encoder." in k}
+ if len(text_encoder_state_dict) > 0:
+ self.load_lora_into_text_encoder(
+ text_encoder_state_dict,
+ network_alphas=network_alphas,
+ text_encoder=self.text_encoder,
+ prefix="text_encoder",
+ lora_scale=self.lora_scale,
+ )
+
+ text_encoder_2_state_dict = {k: v for k, v in state_dict.items() if "text_encoder_2." in k}
+ if len(text_encoder_2_state_dict) > 0:
+ self.load_lora_into_text_encoder(
+ text_encoder_2_state_dict,
+ network_alphas=network_alphas,
+ text_encoder=self.text_encoder_2,
+ prefix="text_encoder_2",
+ lora_scale=self.lora_scale,
+ )
+
+ @classmethod
+ def save_lora_weights(
+ self,
+ save_directory: Union[str, os.PathLike],
+ unet_lora_layers: Dict[str, Union[torch.nn.Module, torch.Tensor]] = None,
+ text_encoder_lora_layers: Dict[str, Union[torch.nn.Module, torch.Tensor]] = None,
+ text_encoder_2_lora_layers: Dict[str, Union[torch.nn.Module, torch.Tensor]] = None,
+ is_main_process: bool = True,
+ weight_name: str = None,
+ save_function: Callable = None,
+ safe_serialization: bool = True,
+ ):
+ state_dict = {}
+
+ def pack_weights(layers, prefix):
+ layers_weights = layers.state_dict() if isinstance(layers, torch.nn.Module) else layers
+ layers_state_dict = {f"{prefix}.{module_name}": param for module_name, param in layers_weights.items()}
+ return layers_state_dict
+
+ state_dict.update(pack_weights(unet_lora_layers, "unet"))
+
+ if text_encoder_lora_layers and text_encoder_2_lora_layers:
+ state_dict.update(pack_weights(text_encoder_lora_layers, "text_encoder"))
+ state_dict.update(pack_weights(text_encoder_2_lora_layers, "text_encoder_2"))
+
+ self.write_lora_layers(
+ state_dict=state_dict,
+ save_directory=save_directory,
+ is_main_process=is_main_process,
+ weight_name=weight_name,
+ save_function=save_function,
+ safe_serialization=safe_serialization,
+ )
+
+ def _remove_text_encoder_monkey_patch(self):
+ self._remove_text_encoder_monkey_patch_classmethod(self.text_encoder)
+ self._remove_text_encoder_monkey_patch_classmethod(self.text_encoder_2)
diff --git a/modules/freescale/freescale_pipeline_img2img.py b/modules/freescale/freescale_pipeline_img2img.py
new file mode 100644
index 000000000..df4c3f0c1
--- /dev/null
+++ b/modules/freescale/freescale_pipeline_img2img.py
@@ -0,0 +1,1245 @@
+from inspect import isfunction
+from functools import partial
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+import inspect
+import os
+import random
+
+from PIL import Image
+import numpy as np
+import torch
+import torch.nn.functional as F
+from einops import rearrange
+from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer
+import torchvision.transforms as transforms
+
+from diffusers.image_processor import VaeImageProcessor
+from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
+from diffusers.models import AutoencoderKL, UNet2DConditionModel
+from diffusers.models.attention_processor import AttnProcessor2_0, LoRAAttnProcessor2_0, LoRAXFormersAttnProcessor, XFormersAttnProcessor
+from diffusers.schedulers import KarrasDiffusionSchedulers
+from diffusers.utils.torch_utils import randn_tensor
+from diffusers.utils import is_accelerate_available, is_accelerate_version, logging, replace_example_docstring
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline
+from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
+from diffusers.models.attention import BasicTransformerBlock
+
+from .scale_attention import ori_forward, scale_forward
+
+
+logger = logging.get_logger(__name__) # pylint: disable=invalid-name
+
+EXAMPLE_DOC_STRING = """
+ Examples:
+ ```py
+ >>> import torch
+ >>> from diffusers import StableDiffusionXLPipeline
+
+ >>> pipe = StableDiffusionXLPipeline.from_pretrained(
+ ... "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
+ ... )
+ >>> pipe = pipe.to("cuda")
+
+ >>> prompt = "a photo of an astronaut riding a horse on mars"
+ >>> image = pipe(prompt).images[0]
+ ```
+"""
+
+def process_image_to_tensor(image):
+ image = image.convert("RGB")
+ # image = Image.open(image_path).convert("RGB")
+ transform = transforms.Compose(
+ [
+ # transforms.Resize((1024, 1024)),
+ transforms.ToTensor(),
+ transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
+ ]
+ )
+ image_tensor = transform(image)
+ return image_tensor
+
+def process_image_to_bitensor(image):
+ # image = Image.open(image_path).convert("L")
+ image = image.convert("L")
+ transform = transforms.ToTensor()
+ image_tensor = transform(image)
+ binary_tensor = torch.where(image_tensor != 0, torch.tensor(1.0), torch.tensor(0.0))
+ return binary_tensor
+
+def default(val, d):
+ if exists(val):
+ return val
+ return d() if isfunction(d) else d
+
+def exists(val):
+ return val is not None
+
+def extract_into_tensor(a, t, x_shape):
+ b, *_ = t.shape
+ out = a.gather(-1, t)
+ return out.reshape(b, *((1,) * (len(x_shape) - 1)))
+
+def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):
+ if schedule == "linear":
+ betas = (
+ torch.linspace(linear_start ** 0.5, linear_end ** 0.5, n_timestep, dtype=torch.float64) ** 2
+ )
+ elif schedule == "cosine":
+ timesteps = (
+ torch.arange(n_timestep + 1, dtype=torch.float64) / n_timestep + cosine_s
+ )
+ alphas = timesteps / (1 + cosine_s) * np.pi / 2
+ alphas = torch.cos(alphas).pow(2)
+ alphas = alphas / alphas[0]
+ betas = 1 - alphas[1:] / alphas[:-1]
+ betas = np.clip(betas, a_min=0, a_max=0.999)
+ elif schedule == "sqrt_linear":
+ betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64)
+ elif schedule == "sqrt":
+ betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) ** 0.5
+ else:
+ raise ValueError(f"schedule '{schedule}' unknown.")
+ return betas.numpy()
+
+to_torch = partial(torch.tensor, dtype=torch.float16)
+betas = make_beta_schedule("linear", 1000, linear_start=0.00085, linear_end=0.012)
+alphas = 1. - betas
+alphas_cumprod = np.cumprod(alphas, axis=0)
+sqrt_alphas_cumprod = to_torch(np.sqrt(alphas_cumprod))
+sqrt_one_minus_alphas_cumprod = to_torch(np.sqrt(1. - alphas_cumprod))
+
+def q_sample(x_start, t, init_noise_sigma = 1.0, noise=None, device=None):
+ noise = default(noise, lambda: torch.randn_like(x_start)).to(device) * init_noise_sigma
+ return (extract_into_tensor(sqrt_alphas_cumprod.to(device), t, x_start.shape) * x_start +
+ extract_into_tensor(sqrt_one_minus_alphas_cumprod.to(device), t, x_start.shape) * noise)
+
+def get_views(height, width, h_window_size=128, w_window_size=128, h_window_stride=64, w_window_stride=64, vae_scale_factor=8):
+ height //= vae_scale_factor
+ width //= vae_scale_factor
+ num_blocks_height = int((height - h_window_size) / h_window_stride - 1e-6) + 2 if height > h_window_size else 1
+ num_blocks_width = int((width - w_window_size) / w_window_stride - 1e-6) + 2 if width > w_window_size else 1
+ total_num_blocks = int(num_blocks_height * num_blocks_width)
+ views = []
+ for i in range(total_num_blocks):
+ h_start = int((i // num_blocks_width) * h_window_stride)
+ h_end = h_start + h_window_size
+ w_start = int((i % num_blocks_width) * w_window_stride)
+ w_end = w_start + w_window_size
+
+ if h_end > height:
+ h_start = int(h_start + height - h_end)
+ h_end = int(height)
+ if w_end > width:
+ w_start = int(w_start + width - w_end)
+ w_end = int(width)
+ if h_start < 0:
+ h_end = int(h_end - h_start)
+ h_start = 0
+ if w_start < 0:
+ w_end = int(w_end - w_start)
+ w_start = 0
+
+ random_jitter = True
+ if random_jitter:
+ h_jitter_range = (h_window_size - h_window_stride) // 4
+ w_jitter_range = (w_window_size - w_window_stride) // 4
+ h_jitter = 0
+ w_jitter = 0
+
+ if (w_start != 0) and (w_end != width):
+ w_jitter = random.randint(-w_jitter_range, w_jitter_range)
+ elif (w_start == 0) and (w_end != width):
+ w_jitter = random.randint(-w_jitter_range, 0)
+ elif (w_start != 0) and (w_end == width):
+ w_jitter = random.randint(0, w_jitter_range)
+ if (h_start != 0) and (h_end != height):
+ h_jitter = random.randint(-h_jitter_range, h_jitter_range)
+ elif (h_start == 0) and (h_end != height):
+ h_jitter = random.randint(-h_jitter_range, 0)
+ elif (h_start != 0) and (h_end == height):
+ h_jitter = random.randint(0, h_jitter_range)
+ h_start += (h_jitter + h_jitter_range)
+ h_end += (h_jitter + h_jitter_range)
+ w_start += (w_jitter + w_jitter_range)
+ w_end += (w_jitter + w_jitter_range)
+
+ views.append((h_start, h_end, w_start, w_end))
+ return views
+
+def gaussian_kernel(kernel_size=3, sigma=1.0, channels=3):
+ x_coord = torch.arange(kernel_size)
+ gaussian_1d = torch.exp(-(x_coord - (kernel_size - 1) / 2) ** 2 / (2 * sigma ** 2))
+ gaussian_1d = gaussian_1d / gaussian_1d.sum()
+ gaussian_2d = gaussian_1d[:, None] * gaussian_1d[None, :]
+ kernel = gaussian_2d[None, None, :, :].repeat(channels, 1, 1, 1)
+
+ return kernel
+
+def gaussian_filter(latents, kernel_size=3, sigma=1.0):
+ channels = latents.shape[1]
+ kernel = gaussian_kernel(kernel_size, sigma, channels).to(latents.device, latents.dtype)
+ if len(latents.shape) == 5:
+ b = latents.shape[0]
+ latents = rearrange(latents, 'b c t i j -> (b t) c i j')
+ blurred_latents = F.conv2d(latents, kernel, padding=kernel_size//2, groups=channels)
+ blurred_latents = rearrange(blurred_latents, '(b t) c i j -> b c t i j', b=b)
+ else:
+ blurred_latents = F.conv2d(latents, kernel, padding=kernel_size//2, groups=channels)
+
+ return blurred_latents
+
+# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
+def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
+ """
+ Rescale `noise_cfg` according to `guidance_rescale`. Based on findings of [Common Diffusion Noise Schedules and
+ Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). See Section 3.4
+ """
+ std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True)
+ std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
+ # rescale the results from guidance (fixes overexposure)
+ noise_pred_rescaled = noise_cfg * (std_text / std_cfg)
+ # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images
+ noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg
+ return noise_cfg
+
+
+class StableDiffusionXLFreeScaleImg2Img(DiffusionPipeline, FromSingleFileMixin, LoraLoaderMixin):
+ r"""
+ Pipeline for text-to-image generation using Stable Diffusion XL.
+
+ This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
+ library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
+
+ In addition the pipeline inherits the following loading methods:
+ - *LoRA*: [`StableDiffusionXLPipeline.load_lora_weights`]
+ - *Ckpt*: [`loaders.FromSingleFileMixin.from_single_file`]
+
+ as well as the following saving methods:
+ - *LoRA*: [`loaders.StableDiffusionXLPipeline.save_lora_weights`]
+
+ Args:
+ vae ([`AutoencoderKL`]):
+ Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
+ text_encoder ([`CLIPTextModel`]):
+ Frozen text-encoder. Stable Diffusion XL uses the text portion of
+ [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel), specifically
+ the [clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14) variant.
+ text_encoder_2 ([` CLIPTextModelWithProjection`]):
+ Second frozen text-encoder. Stable Diffusion XL uses the text and pool portion of
+ [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModelWithProjection),
+ specifically the
+ [laion/CLIP-ViT-bigG-14-laion2B-39B-b160k](https://huggingface.co/laion/CLIP-ViT-bigG-14-laion2B-39B-b160k)
+ variant.
+ tokenizer (`CLIPTokenizer`):
+ Tokenizer of class
+ [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer).
+ tokenizer_2 (`CLIPTokenizer`):
+ Second Tokenizer of class
+ [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer).
+ unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents.
+ scheduler ([`SchedulerMixin`]):
+ A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
+ [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
+ """
+
+ def __init__(
+ self,
+ vae: AutoencoderKL,
+ text_encoder: CLIPTextModel,
+ text_encoder_2: CLIPTextModelWithProjection,
+ tokenizer: CLIPTokenizer,
+ tokenizer_2: CLIPTokenizer,
+ unet: UNet2DConditionModel,
+ scheduler: KarrasDiffusionSchedulers,
+ force_zeros_for_empty_prompt: bool = True,
+ add_watermarker: Optional[bool] = None,
+ ):
+ super().__init__()
+
+ self.register_modules(
+ vae=vae,
+ text_encoder=text_encoder,
+ text_encoder_2=text_encoder_2,
+ tokenizer=tokenizer,
+ tokenizer_2=tokenizer_2,
+ unet=unet,
+ scheduler=scheduler,
+ )
+ self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
+ self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
+ self.default_sample_size = self.unet.config.sample_size
+
+ self.vae.enable_tiling()
+
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
+ def enable_vae_slicing(self):
+ r"""
+ Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
+ compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
+ """
+ self.vae.enable_slicing()
+
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
+ def disable_vae_slicing(self):
+ r"""
+ Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
+ computing decoding in one step.
+ """
+ self.vae.disable_slicing()
+
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
+ def enable_vae_tiling(self):
+ r"""
+ Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
+ compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
+ processing larger images.
+ """
+ self.vae.enable_tiling()
+
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
+ def disable_vae_tiling(self):
+ r"""
+ Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
+ computing decoding in one step.
+ """
+ self.vae.disable_tiling()
+
+ def enable_model_cpu_offload(self, gpu_id=0):
+ r"""
+ Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
+ to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
+ method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
+ `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
+ """
+ if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
+ from accelerate import cpu_offload_with_hook
+ else:
+ raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
+
+ device = torch.device(f"cuda:{gpu_id}")
+
+ if self.device.type != "cpu":
+ self.to("cpu", silence_dtype_warnings=True)
+ torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
+
+ model_sequence = (
+ [self.text_encoder, self.text_encoder_2] if self.text_encoder is not None else [self.text_encoder_2]
+ )
+ model_sequence.extend([self.unet, self.vae])
+
+ hook = None
+ for cpu_offloaded_model in model_sequence:
+ _, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
+
+ # We'll offload the last model manually.
+ self.final_offload_hook = hook
+
+ def encode_prompt(
+ self,
+ prompt: str,
+ prompt_2: Optional[str] = None,
+ device: Optional[torch.device] = None,
+ num_images_per_prompt: int = 1,
+ do_classifier_free_guidance: bool = True,
+ negative_prompt: Optional[str] = None,
+ negative_prompt_2: Optional[str] = None,
+ prompt_embeds: Optional[torch.FloatTensor] = None,
+ negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+ pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+ negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+ lora_scale: Optional[float] = None,
+ ):
+ r"""
+ Encodes the prompt into text encoder hidden states.
+
+ Args:
+ prompt (`str` or `List[str]`, *optional*):
+ prompt to be encoded
+ prompt_2 (`str` or `List[str]`, *optional*):
+ The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
+ used in both text-encoders
+ device: (`torch.device`):
+ torch device
+ num_images_per_prompt (`int`):
+ number of images that should be generated per prompt
+ do_classifier_free_guidance (`bool`):
+ whether to use classifier free guidance or not
+ negative_prompt (`str` or `List[str]`, *optional*):
+ The prompt or prompts not to guide the image generation. If not defined, one has to pass
+ `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
+ less than `1`).
+ negative_prompt_2 (`str` or `List[str]`, *optional*):
+ The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
+ `text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
+ prompt_embeds (`torch.FloatTensor`, *optional*):
+ Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
+ provided, text embeddings will be generated from `prompt` input argument.
+ negative_prompt_embeds (`torch.FloatTensor`, *optional*):
+ Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+ weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
+ argument.
+ pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+ Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
+ If not provided, pooled text embeddings will be generated from `prompt` input argument.
+ negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+ Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+ weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
+ input argument.
+ lora_scale (`float`, *optional*):
+ A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
+ """
+ device = device or self._execution_device
+
+ # set lora scale so that monkey patched LoRA
+ # function of text encoder can correctly access it
+ if lora_scale is not None and isinstance(self, LoraLoaderMixin):
+ self._lora_scale = lora_scale
+
+ if prompt is not None and isinstance(prompt, str):
+ batch_size = 1
+ elif prompt is not None and isinstance(prompt, list):
+ batch_size = len(prompt)
+ else:
+ batch_size = prompt_embeds.shape[0]
+
+ # Define tokenizers and text encoders
+ tokenizers = [self.tokenizer, self.tokenizer_2] if self.tokenizer is not None else [self.tokenizer_2]
+ text_encoders = (
+ [self.text_encoder, self.text_encoder_2] if self.text_encoder is not None else [self.text_encoder_2]
+ )
+
+ if prompt_embeds is None:
+ prompt_2 = prompt_2 or prompt
+ # textual inversion: procecss multi-vector tokens if necessary
+ prompt_embeds_list = []
+ prompts = [prompt, prompt_2]
+ for prompt, tokenizer, text_encoder in zip(prompts, tokenizers, text_encoders):
+ if isinstance(self, TextualInversionLoaderMixin):
+ prompt = self.maybe_convert_prompt(prompt, tokenizer)
+
+ text_inputs = tokenizer(
+ prompt,
+ padding="max_length",
+ max_length=tokenizer.model_max_length,
+ truncation=True,
+ return_tensors="pt",
+ )
+
+ text_input_ids = text_inputs.input_ids
+ untruncated_ids = tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
+
+ if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(
+ text_input_ids, untruncated_ids
+ ):
+ removed_text = tokenizer.batch_decode(untruncated_ids[:, tokenizer.model_max_length - 1 : -1])
+ logger.warning(
+ "The following part of your input was truncated because CLIP can only handle sequences up to"
+ f" {tokenizer.model_max_length} tokens: {removed_text}"
+ )
+
+ prompt_embeds = text_encoder(
+ text_input_ids.to(device),
+ output_hidden_states=True,
+ )
+
+ # We are only ALWAYS interested in the pooled output of the final text encoder
+ pooled_prompt_embeds = prompt_embeds[0]
+ prompt_embeds = prompt_embeds.hidden_states[-2]
+
+ prompt_embeds_list.append(prompt_embeds)
+
+ prompt_embeds = torch.concat(prompt_embeds_list, dim=-1)
+
+ # get unconditional embeddings for classifier free guidance
+ zero_out_negative_prompt = negative_prompt is None and self.config.force_zeros_for_empty_prompt
+ if do_classifier_free_guidance and negative_prompt_embeds is None and zero_out_negative_prompt:
+ negative_prompt_embeds = torch.zeros_like(prompt_embeds)
+ negative_pooled_prompt_embeds = torch.zeros_like(pooled_prompt_embeds)
+ elif do_classifier_free_guidance and negative_prompt_embeds is None:
+ negative_prompt = negative_prompt or ""
+ negative_prompt_2 = negative_prompt_2 or negative_prompt
+
+ uncond_tokens: List[str]
+ if prompt is not None and type(prompt) is not type(negative_prompt):
+ raise TypeError(
+ f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !="
+ f" {type(prompt)}."
+ )
+ elif isinstance(negative_prompt, str):
+ uncond_tokens = [negative_prompt, negative_prompt_2]
+ elif batch_size != len(negative_prompt):
+ raise ValueError(
+ f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:"
+ f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches"
+ " the batch size of `prompt`."
+ )
+ else:
+ uncond_tokens = [negative_prompt, negative_prompt_2]
+
+ negative_prompt_embeds_list = []
+ for negative_prompt, tokenizer, text_encoder in zip(uncond_tokens, tokenizers, text_encoders):
+ if isinstance(self, TextualInversionLoaderMixin):
+ negative_prompt = self.maybe_convert_prompt(negative_prompt, tokenizer)
+
+ max_length = prompt_embeds.shape[1]
+ uncond_input = tokenizer(
+ negative_prompt,
+ padding="max_length",
+ max_length=max_length,
+ truncation=True,
+ return_tensors="pt",
+ )
+
+ negative_prompt_embeds = text_encoder(
+ uncond_input.input_ids.to(device),
+ output_hidden_states=True,
+ )
+ # We are only ALWAYS interested in the pooled output of the final text encoder
+ negative_pooled_prompt_embeds = negative_prompt_embeds[0]
+ negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
+
+ negative_prompt_embeds_list.append(negative_prompt_embeds)
+
+ negative_prompt_embeds = torch.concat(negative_prompt_embeds_list, dim=-1)
+
+ prompt_embeds = prompt_embeds.to(dtype=self.text_encoder_2.dtype, device=device)
+ bs_embed, seq_len, _ = prompt_embeds.shape
+ # duplicate text embeddings for each generation per prompt, using mps friendly method
+ prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1)
+ prompt_embeds = prompt_embeds.view(bs_embed * num_images_per_prompt, seq_len, -1)
+
+ if do_classifier_free_guidance:
+ # duplicate unconditional embeddings for each generation per prompt, using mps friendly method
+ seq_len = negative_prompt_embeds.shape[1]
+ negative_prompt_embeds = negative_prompt_embeds.to(dtype=self.text_encoder_2.dtype, device=device)
+ negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
+ negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
+
+ pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt).view(
+ bs_embed * num_images_per_prompt, -1
+ )
+ if do_classifier_free_guidance:
+ negative_pooled_prompt_embeds = negative_pooled_prompt_embeds.repeat(1, num_images_per_prompt).view(
+ bs_embed * num_images_per_prompt, -1
+ )
+
+ return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
+
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
+ def prepare_extra_step_kwargs(self, generator, eta):
+ # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
+ # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
+ # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
+ # and should be between [0, 1]
+
+ accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
+ extra_step_kwargs = {}
+ if accepts_eta:
+ extra_step_kwargs["eta"] = eta
+
+ # check if the scheduler accepts generator
+ accepts_generator = "generator" in set(inspect.signature(self.scheduler.step).parameters.keys())
+ if accepts_generator:
+ extra_step_kwargs["generator"] = generator
+ return extra_step_kwargs
+
+ def check_inputs(
+ self,
+ prompt,
+ prompt_2,
+ height,
+ width,
+ callback_steps,
+ negative_prompt=None,
+ negative_prompt_2=None,
+ prompt_embeds=None,
+ negative_prompt_embeds=None,
+ pooled_prompt_embeds=None,
+ negative_pooled_prompt_embeds=None,
+ ):
+ if height % 8 != 0 or width % 8 != 0:
+ raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
+
+ if (callback_steps is None) or (
+ callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0)
+ ):
+ raise ValueError(
+ f"`callback_steps` has to be a positive integer but is {callback_steps} of type"
+ f" {type(callback_steps)}."
+ )
+
+ if prompt is not None and prompt_embeds is not None:
+ raise ValueError(
+ f"Cannot forward both `prompt`: {prompt} and `prompt_embeds`: {prompt_embeds}. Please make sure to"
+ " only forward one of the two."
+ )
+ elif prompt_2 is not None and prompt_embeds is not None:
+ raise ValueError(
+ f"Cannot forward both `prompt_2`: {prompt_2} and `prompt_embeds`: {prompt_embeds}. Please make sure to"
+ " only forward one of the two."
+ )
+ elif prompt is None and prompt_embeds is None:
+ raise ValueError(
+ "Provide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined."
+ )
+ elif prompt is not None and (not isinstance(prompt, str) and not isinstance(prompt, list)):
+ raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}")
+ elif prompt_2 is not None and (not isinstance(prompt_2, str) and not isinstance(prompt_2, list)):
+ raise ValueError(f"`prompt_2` has to be of type `str` or `list` but is {type(prompt_2)}")
+
+ if negative_prompt is not None and negative_prompt_embeds is not None:
+ raise ValueError(
+ f"Cannot forward both `negative_prompt`: {negative_prompt} and `negative_prompt_embeds`:"
+ f" {negative_prompt_embeds}. Please make sure to only forward one of the two."
+ )
+ elif negative_prompt_2 is not None and negative_prompt_embeds is not None:
+ raise ValueError(
+ f"Cannot forward both `negative_prompt_2`: {negative_prompt_2} and `negative_prompt_embeds`:"
+ f" {negative_prompt_embeds}. Please make sure to only forward one of the two."
+ )
+
+ if prompt_embeds is not None and negative_prompt_embeds is not None:
+ if prompt_embeds.shape != negative_prompt_embeds.shape:
+ raise ValueError(
+ "`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but"
+ f" got: `prompt_embeds` {prompt_embeds.shape} != `negative_prompt_embeds`"
+ f" {negative_prompt_embeds.shape}."
+ )
+
+ if prompt_embeds is not None and pooled_prompt_embeds is None:
+ raise ValueError(
+ "If `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`."
+ )
+
+ if negative_prompt_embeds is not None and negative_pooled_prompt_embeds is None:
+ raise ValueError(
+ "If `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`."
+ )
+
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
+ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
+ shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
+ if isinstance(generator, list) and len(generator) != batch_size:
+ raise ValueError(
+ f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
+ f" size of {batch_size}. Make sure the batch size matches the length of the generators."
+ )
+
+ if latents is None:
+ latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
+ else:
+ latents = latents.to(device)
+
+ # scale the initial noise by the standard deviation required by the scheduler
+ latents = latents * self.scheduler.init_noise_sigma
+ return latents
+
+ def _get_add_time_ids(self, original_size, crops_coords_top_left, target_size, dtype):
+ add_time_ids = list(original_size + crops_coords_top_left + target_size)
+
+ passed_add_embed_dim = (
+ self.unet.config.addition_time_embed_dim * len(add_time_ids) + self.text_encoder_2.config.projection_dim
+ )
+ expected_add_embed_dim = self.unet.add_embedding.linear_1.in_features
+
+ if expected_add_embed_dim != passed_add_embed_dim:
+ raise ValueError(
+ f"Model expects an added time embedding vector of length {expected_add_embed_dim}, but a vector of {passed_add_embed_dim} was created. The model has an incorrect config. Please check `unet.config.time_embedding_type` and `text_encoder_2.config.projection_dim`."
+ )
+
+ add_time_ids = torch.tensor([add_time_ids], dtype=dtype)
+ return add_time_ids
+
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_upscale.StableDiffusionUpscalePipeline.upcast_vae
+ def upcast_vae(self):
+ dtype = self.vae.dtype
+ self.vae.to(dtype=torch.float32)
+ use_torch_2_0_or_xformers = isinstance(
+ self.vae.decoder.mid_block.attentions[0].processor,
+ (
+ AttnProcessor2_0,
+ XFormersAttnProcessor,
+ LoRAXFormersAttnProcessor,
+ LoRAAttnProcessor2_0,
+ ),
+ )
+ # if xformers or torch_2_0 is used attention block does not need
+ # to be in float32 which can save lots of memory
+ if use_torch_2_0_or_xformers:
+ self.vae.post_quant_conv.to(dtype)
+ self.vae.decoder.conv_in.to(dtype)
+ self.vae.decoder.mid_block.to(dtype)
+
+ @torch.no_grad()
+ @replace_example_docstring(EXAMPLE_DOC_STRING)
+ def __call__(
+ self,
+ prompt: Union[str, List[str]] = None,
+ prompt_2: Optional[Union[str, List[str]]] = None,
+ height: Optional[int] = None,
+ width: Optional[int] = None,
+ num_inference_steps: int = 50,
+ denoising_end: Optional[float] = None,
+ guidance_scale: float = 5.0,
+ negative_prompt: Optional[Union[str, List[str]]] = None,
+ negative_prompt_2: Optional[Union[str, List[str]]] = None,
+ num_images_per_prompt: Optional[int] = 1,
+ eta: float = 0.0,
+ generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
+ latents: Optional[torch.FloatTensor] = None,
+ prompt_embeds: Optional[torch.FloatTensor] = None,
+ negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+ pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+ negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+ output_type: Optional[str] = "pil",
+ return_dict: bool = True,
+ callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
+ callback_steps: int = 1,
+ cross_attention_kwargs: Optional[Dict[str, Any]] = None,
+ guidance_rescale: float = 0.0,
+ original_size: Optional[Tuple[int, int]] = None,
+ crops_coords_top_left: Tuple[int, int] = (0, 0),
+ target_size: Optional[Tuple[int, int]] = None,
+ resolutions_list: Optional[Union[int, List[int]]] = None,
+ restart_steps: Optional[Union[int, List[int]]] = None,
+ cosine_scale: float = 2.0,
+ cosine_scale_bg: float = 1.0,
+ dilate_tau: int = 35,
+ img_path: Optional[str] = "",
+ mask_path: Optional[str] = "",
+ ):
+ r"""
+ Function invoked when calling the pipeline for generation.
+
+ Args:
+ prompt (`str` or `List[str]`, *optional*):
+ The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
+ instead.
+ prompt_2 (`str` or `List[str]`, *optional*):
+ The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
+ used in both text-encoders
+ height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
+ The height in pixels of the generated image.
+ width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
+ The width in pixels of the generated image.
+ num_inference_steps (`int`, *optional*, defaults to 50):
+ The number of denoising steps. More denoising steps usually lead to a higher quality image at the
+ expense of slower inference.
+ denoising_end (`float`, *optional*):
+ When specified, determines the fraction (between 0.0 and 1.0) of the total denoising process to be
+ completed before it is intentionally prematurely terminated. As a result, the returned sample will
+ still retain a substantial amount of noise as determined by the discrete timesteps selected by the
+ scheduler. The denoising_end parameter should ideally be utilized when this pipeline forms a part of a
+ "Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
+ Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output)
+ guidance_scale (`float`, *optional*, defaults to 5.0):
+ Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
+ `guidance_scale` is defined as `w` of equation 2. of [Imagen
+ Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
+ 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
+ usually at the expense of lower image quality.
+ negative_prompt (`str` or `List[str]`, *optional*):
+ The prompt or prompts not to guide the image generation. If not defined, one has to pass
+ `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
+ less than `1`).
+ negative_prompt_2 (`str` or `List[str]`, *optional*):
+ The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
+ `text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
+ num_images_per_prompt (`int`, *optional*, defaults to 1):
+ The number of images to generate per prompt.
+ eta (`float`, *optional*, defaults to 0.0):
+ Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
+ [`schedulers.DDIMScheduler`], will be ignored for others.
+ generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
+ One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
+ to make generation deterministic.
+ latents (`torch.FloatTensor`, *optional*):
+ Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
+ generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
+ tensor will ge generated by sampling using the supplied random `generator`.
+ prompt_embeds (`torch.FloatTensor`, *optional*):
+ Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
+ provided, text embeddings will be generated from `prompt` input argument.
+ negative_prompt_embeds (`torch.FloatTensor`, *optional*):
+ Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+ weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
+ argument.
+ pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+ Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
+ If not provided, pooled text embeddings will be generated from `prompt` input argument.
+ negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+ Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+ weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
+ input argument.
+ output_type (`str`, *optional*, defaults to `"pil"`):
+ The output format of the generate image. Choose between
+ [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
+ return_dict (`bool`, *optional*, defaults to `True`):
+ Whether or not to return a [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] instead
+ of a plain tuple.
+ callback (`Callable`, *optional*):
+ A function that will be called every `callback_steps` steps during inference. The function will be
+ called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
+ callback_steps (`int`, *optional*, defaults to 1):
+ The frequency at which the `callback` function will be called. If not specified, the callback will be
+ called at every step.
+ cross_attention_kwargs (`dict`, *optional*):
+ A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
+ `self.processor` in
+ [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
+ guidance_rescale (`float`, *optional*, defaults to 0.7):
+ Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
+ Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of
+ [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).
+ Guidance rescale factor should fix overexposure when using zero terminal SNR.
+ original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
+ If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
+ `original_size` defaults to `(width, height)` if not specified. Part of SDXL's micro-conditioning as
+ explained in section 2.2 of
+ [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
+ crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
+ `crops_coords_top_left` can be used to generate an image that appears to be "cropped" from the position
+ `crops_coords_top_left` downwards. Favorable, well-centered images are usually achieved by setting
+ `crops_coords_top_left` to (0, 0). Part of SDXL's micro-conditioning as explained in section 2.2 of
+ [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
+ target_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
+ For most cases, `target_size` should be set to the desired height and width of the generated image. If
+ not specified it will default to `(width, height)`. Part of SDXL's micro-conditioning as explained in
+ section 2.2 of [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
+
+ Examples:
+
+ Returns:
+ [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] or `tuple`:
+ [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] if `return_dict` is True, otherwise a
+ `tuple`. When returning a tuple, the first element is a list with the generated images.
+ """
+
+
+ # 0. Default height and width to unet
+ if resolutions_list:
+ height, width = resolutions_list[0]
+ target_sizes = resolutions_list[1:]
+ if not restart_steps:
+ restart_steps = [15] * len(target_sizes)
+ else:
+ height = height or self.default_sample_size * self.vae_scale_factor
+ width = width or self.default_sample_size * self.vae_scale_factor
+
+ original_size = original_size or (height, width)
+ target_size = target_size or (height, width)
+
+ # 1. Check inputs. Raise error if not correct
+ self.check_inputs(
+ prompt,
+ prompt_2,
+ height,
+ width,
+ callback_steps,
+ negative_prompt,
+ negative_prompt_2,
+ prompt_embeds,
+ negative_prompt_embeds,
+ pooled_prompt_embeds,
+ negative_pooled_prompt_embeds,
+ )
+
+ # 2. Define call parameters
+ if prompt is not None and isinstance(prompt, str):
+ batch_size = 1
+ elif prompt is not None and isinstance(prompt, list):
+ batch_size = len(prompt)
+ else:
+ batch_size = prompt_embeds.shape[0]
+
+ device = self._execution_device
+
+ # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
+ # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
+ # corresponds to doing no classifier free guidance.
+ do_classifier_free_guidance = guidance_scale > 1.0
+
+ # 3. Encode input prompt
+ text_encoder_lora_scale = (
+ cross_attention_kwargs.get("scale", None) if cross_attention_kwargs is not None else None
+ )
+ (
+ prompt_embeds,
+ negative_prompt_embeds,
+ pooled_prompt_embeds,
+ negative_pooled_prompt_embeds,
+ ) = self.encode_prompt(
+ prompt=prompt,
+ prompt_2=prompt_2,
+ device=device,
+ num_images_per_prompt=num_images_per_prompt,
+ do_classifier_free_guidance=do_classifier_free_guidance,
+ negative_prompt=negative_prompt,
+ negative_prompt_2=negative_prompt_2,
+ prompt_embeds=prompt_embeds,
+ negative_prompt_embeds=negative_prompt_embeds,
+ pooled_prompt_embeds=pooled_prompt_embeds,
+ negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
+ lora_scale=text_encoder_lora_scale,
+ )
+
+ # 4. Prepare timesteps
+ self.scheduler.set_timesteps(num_inference_steps, device=device)
+
+ timesteps = self.scheduler.timesteps
+
+ # 5. Prepare latent variables
+ num_channels_latents = self.unet.config.in_channels
+ latents = self.prepare_latents(
+ batch_size * num_images_per_prompt,
+ num_channels_latents,
+ height,
+ width,
+ prompt_embeds.dtype,
+ device,
+ generator,
+ latents,
+ )
+
+ # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
+ extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
+
+ # 7. Prepare added time ids & embeddings
+ add_text_embeds = pooled_prompt_embeds
+ add_time_ids = self._get_add_time_ids(
+ original_size, crops_coords_top_left, target_size, dtype=prompt_embeds.dtype
+ )
+
+ if do_classifier_free_guidance:
+ prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
+ add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0)
+ add_time_ids = torch.cat([add_time_ids, add_time_ids], dim=0)
+
+ prompt_embeds = prompt_embeds.to(device)
+ add_text_embeds = add_text_embeds.to(device)
+ add_time_ids = add_time_ids.to(device).repeat(batch_size * num_images_per_prompt, 1)
+
+ # 8. Denoising loop
+ num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
+
+ # 9.1 Apply denoising_end
+ if denoising_end is not None and type(denoising_end) == float and denoising_end > 0 and denoising_end < 1:
+ discrete_timestep_cutoff = int(
+ round(
+ self.scheduler.config.num_train_timesteps
+ - (denoising_end * self.scheduler.config.num_train_timesteps)
+ )
+ )
+ num_inference_steps = len(list(filter(lambda ts: ts >= discrete_timestep_cutoff, timesteps)))
+ timesteps = timesteps[:num_inference_steps]
+
+ results_list = []
+
+ for block in self.unet.down_blocks + [self.unet.mid_block] + self.unet.up_blocks:
+ for module in block.modules():
+ if isinstance(module, BasicTransformerBlock):
+ module.forward = ori_forward.__get__(module, BasicTransformerBlock)
+
+ if img_path != '':
+ needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
+ if needs_upcasting:
+ self.upcast_vae()
+ latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
+ input_image = process_image_to_tensor(img_path).unsqueeze(0).to(dtype=self.vae.dtype, device=device)
+ latents = self.vae.encode(input_image).latent_dist.sample().to(self.vae.dtype)
+ latents = latents * self.vae.config.scaling_factor
+ else:
+ with self.progress_bar(total=num_inference_steps) as progress_bar:
+ for i, t in enumerate(timesteps):
+ # expand the latents if we are doing classifier free guidance
+ latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
+
+ latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
+
+ # predict the noise residual
+ added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids}
+ noise_pred = self.unet(
+ latent_model_input,
+ t,
+ encoder_hidden_states=prompt_embeds,
+ cross_attention_kwargs=cross_attention_kwargs,
+ added_cond_kwargs=added_cond_kwargs,
+ return_dict=False,
+ )[0]
+
+ # perform guidance
+ if do_classifier_free_guidance:
+ noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+ noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
+
+ if do_classifier_free_guidance and guidance_rescale > 0.0:
+ # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
+ noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
+
+ # compute the previous noisy sample x_t -> x_t-1
+ latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
+
+ # call the callback, if provided
+ if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
+ progress_bar.update()
+ if callback is not None and i % callback_steps == 0:
+ callback(i, t, latents)
+
+ results_list.append(latents)
+
+ if mask_path != '':
+ mask = process_image_to_bitensor(mask_path).unsqueeze(0)
+
+ for restart_index, target_size in enumerate(target_sizes):
+ restart_step = restart_steps[restart_index]
+ target_size_ = [target_size[0]//8, target_size[1]//8]
+
+ for block in self.unet.down_blocks + [self.unet.mid_block] + self.unet.up_blocks:
+ for module in block.modules():
+ if isinstance(module, BasicTransformerBlock):
+ module.forward = scale_forward.__get__(module, BasicTransformerBlock)
+ module.current_hw = target_size
+
+ needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
+ if needs_upcasting:
+ self.upcast_vae()
+ latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
+
+ latents = latents / self.vae.config.scaling_factor
+ image = self.vae.decode(latents, return_dict=False)[0]
+ image = torch.nn.functional.interpolate(
+ image,
+ size=target_size,
+ mode='bicubic',
+ )
+ latents = self.vae.encode(image).latent_dist.sample().to(self.vae.dtype)
+ latents = latents * self.vae.config.scaling_factor
+
+ if mask_path != '':
+ mask_ = torch.nn.functional.interpolate(
+ mask,
+ size=target_size_,
+ mode="nearest",
+ ).to(device)
+
+ noise_latents = []
+ noise = torch.randn_like(latents)
+ for timestep in self.scheduler.timesteps:
+ noise_latent = self.scheduler.add_noise(latents, noise, timestep.unsqueeze(0))
+ noise_latents.append(noise_latent)
+ latents = noise_latents[restart_step]
+
+ self.scheduler._step_index = 0
+ with self.progress_bar(total=num_inference_steps) as progress_bar:
+ for i, t in enumerate(timesteps):
+
+ if i < restart_step:
+ self.scheduler._step_index += 1
+ progress_bar.update()
+ continue
+
+ cosine_factor = 0.5 * (1 + torch.cos(torch.pi * (self.scheduler.config.num_train_timesteps - t) / self.scheduler.config.num_train_timesteps)).cpu()
+ if mask_path != '':
+ c1 = (cosine_factor ** (mask_ * cosine_scale + (1-mask_) * cosine_scale_bg)).to(dtype=torch.float16)
+ else:
+ c1 = cosine_factor ** cosine_scale
+ latents = latents * (1 - c1) + noise_latents[i] * c1
+
+ dilate_coef=target_size[1]//1024
+
+ dilate_layers = [
+ # "down_blocks.1.resnets.0.conv1",
+ # "down_blocks.1.resnets.0.conv2",
+ # "down_blocks.1.resnets.1.conv1",
+ # "down_blocks.1.resnets.1.conv2",
+ "down_blocks.1.downsamplers.0.conv",
+ "down_blocks.2.resnets.0.conv1",
+ "down_blocks.2.resnets.0.conv2",
+ "down_blocks.2.resnets.1.conv1",
+ "down_blocks.2.resnets.1.conv2",
+ # "up_blocks.0.resnets.0.conv1",
+ # "up_blocks.0.resnets.0.conv2",
+ # "up_blocks.0.resnets.1.conv1",
+ # "up_blocks.0.resnets.1.conv2",
+ # "up_blocks.0.resnets.2.conv1",
+ # "up_blocks.0.resnets.2.conv2",
+ # "up_blocks.0.upsamplers.0.conv",
+ # "up_blocks.1.resnets.0.conv1",
+ # "up_blocks.1.resnets.0.conv2",
+ # "up_blocks.1.resnets.1.conv1",
+ # "up_blocks.1.resnets.1.conv2",
+ # "up_blocks.1.resnets.2.conv1",
+ # "up_blocks.1.resnets.2.conv2",
+ # "up_blocks.1.upsamplers.0.conv",
+ # "up_blocks.2.resnets.0.conv1",
+ # "up_blocks.2.resnets.0.conv2",
+ # "up_blocks.2.resnets.1.conv1",
+ # "up_blocks.2.resnets.1.conv2",
+ # "up_blocks.2.resnets.2.conv1",
+ # "up_blocks.2.resnets.2.conv2",
+ "mid_block.resnets.0.conv1",
+ "mid_block.resnets.0.conv2",
+ "mid_block.resnets.1.conv1",
+ "mid_block.resnets.1.conv2"
+ ]
+
+ for name, module in self.unet.named_modules():
+ if name in dilate_layers:
+ if i < dilate_tau:
+ module.dilation = (dilate_coef, dilate_coef)
+ module.padding = (dilate_coef, dilate_coef)
+ else:
+ module.dilation = (1, 1)
+ module.padding = (1, 1)
+
+ # expand the latents if we are doing classifier free guidance
+ latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
+
+ latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
+
+
+ # predict the noise residual
+ added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids}
+ noise_pred = self.unet(
+ latent_model_input,
+ t,
+ encoder_hidden_states=prompt_embeds,
+ cross_attention_kwargs=cross_attention_kwargs,
+ added_cond_kwargs=added_cond_kwargs,
+ return_dict=False,
+ )[0]
+
+ # perform guidance
+ if do_classifier_free_guidance:
+ noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+ noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
+
+ if do_classifier_free_guidance and guidance_rescale > 0.0:
+ # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
+ noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
+
+ # compute the previous noisy sample x_t -> x_t-1
+ latents_dtype = latents.dtype
+ latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
+ if latents.dtype != latents_dtype:
+ if torch.backends.mps.is_available():
+ # some platforms (eg. apple mps) misbehave due to a pytorch bug: https://github.com/pytorch/pytorch/pull/99272
+ latents = latents.to(latents_dtype)
+
+ # call the callback, if provided
+ if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
+ progress_bar.update()
+ if callback is not None and i % callback_steps == 0:
+ callback(i, t, latents)
+
+ for name, module in self.unet.named_modules():
+ # if ('.conv' in name) and ('.conv_' not in name):
+ if name in dilate_layers:
+ module.dilation = (1, 1)
+ module.padding = (1, 1)
+
+ results_list.append(latents)
+
+ """
+ final_results = []
+ for latents in results_list:
+ # make sure the VAE is in float32 mode, as it overflows in float16
+ if self.vae.dtype == torch.float16 and self.vae.config.force_upcast:
+ self.upcast_vae()
+ latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
+
+ if not output_type == "latent":
+ image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
+ else:
+ image = latents
+ return StableDiffusionXLPipelineOutput(images=image)
+
+ # apply watermark if available
+ if self.watermark is not None:
+ image = self.watermark.apply_watermark(image)
+
+ image = self.image_processor.postprocess(image, output_type=output_type)
+
+ if not return_dict:
+ final_results += [(image,)]
+ else:
+ final_results += [StableDiffusionXLPipelineOutput(images=image)]
+
+ # Offload last model to CPU
+ if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
+ self.final_offload_hook.offload()
+
+ return final_results
+ """
+ return StableDiffusionXLPipelineOutput(images=results_list)
+
+ # Overrride to properly handle the loading and unloading of the additional text encoder.
+ def load_lora_weights(self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], **kwargs):
+ # We could have accessed the unet config from `lora_state_dict()` too. We pass
+ # it here explicitly to be able to tell that it's coming from an SDXL
+ # pipeline.
+ state_dict, network_alphas = self.lora_state_dict(
+ pretrained_model_name_or_path_or_dict,
+ unet_config=self.unet.config,
+ **kwargs,
+ )
+ self.load_lora_into_unet(state_dict, network_alphas=network_alphas, unet=self.unet)
+
+ text_encoder_state_dict = {k: v for k, v in state_dict.items() if "text_encoder." in k}
+ if len(text_encoder_state_dict) > 0:
+ self.load_lora_into_text_encoder(
+ text_encoder_state_dict,
+ network_alphas=network_alphas,
+ text_encoder=self.text_encoder,
+ prefix="text_encoder",
+ lora_scale=self.lora_scale,
+ )
+
+ text_encoder_2_state_dict = {k: v for k, v in state_dict.items() if "text_encoder_2." in k}
+ if len(text_encoder_2_state_dict) > 0:
+ self.load_lora_into_text_encoder(
+ text_encoder_2_state_dict,
+ network_alphas=network_alphas,
+ text_encoder=self.text_encoder_2,
+ prefix="text_encoder_2",
+ lora_scale=self.lora_scale,
+ )
+
+ @classmethod
+ def save_lora_weights(
+ self,
+ save_directory: Union[str, os.PathLike],
+ unet_lora_layers: Dict[str, Union[torch.nn.Module, torch.Tensor]] = None,
+ text_encoder_lora_layers: Dict[str, Union[torch.nn.Module, torch.Tensor]] = None,
+ text_encoder_2_lora_layers: Dict[str, Union[torch.nn.Module, torch.Tensor]] = None,
+ is_main_process: bool = True,
+ weight_name: str = None,
+ save_function: Callable = None,
+ safe_serialization: bool = True,
+ ):
+ state_dict = {}
+
+ def pack_weights(layers, prefix):
+ layers_weights = layers.state_dict() if isinstance(layers, torch.nn.Module) else layers
+ layers_state_dict = {f"{prefix}.{module_name}": param for module_name, param in layers_weights.items()}
+ return layers_state_dict
+
+ state_dict.update(pack_weights(unet_lora_layers, "unet"))
+
+ if text_encoder_lora_layers and text_encoder_2_lora_layers:
+ state_dict.update(pack_weights(text_encoder_lora_layers, "text_encoder"))
+ state_dict.update(pack_weights(text_encoder_2_lora_layers, "text_encoder_2"))
+
+ self.write_lora_layers(
+ state_dict=state_dict,
+ save_directory=save_directory,
+ is_main_process=is_main_process,
+ weight_name=weight_name,
+ save_function=save_function,
+ safe_serialization=safe_serialization,
+ )
+
+ def _remove_text_encoder_monkey_patch(self):
+ self._remove_text_encoder_monkey_patch_classmethod(self.text_encoder)
+ self._remove_text_encoder_monkey_patch_classmethod(self.text_encoder_2)
diff --git a/modules/freescale/scale_attention.py b/modules/freescale/scale_attention.py
new file mode 100644
index 000000000..9e83d5067
--- /dev/null
+++ b/modules/freescale/scale_attention.py
@@ -0,0 +1,367 @@
+from typing import Any, Dict, Optional
+import random
+import torch
+import torch.nn.functional as F
+from einops import rearrange
+
+
+def gaussian_kernel(kernel_size=3, sigma=1.0, channels=3):
+ x_coord = torch.arange(kernel_size)
+ gaussian_1d = torch.exp(-(x_coord - (kernel_size - 1) / 2) ** 2 / (2 * sigma ** 2))
+ gaussian_1d = gaussian_1d / gaussian_1d.sum()
+ gaussian_2d = gaussian_1d[:, None] * gaussian_1d[None, :]
+ kernel = gaussian_2d[None, None, :, :].repeat(channels, 1, 1, 1)
+
+ return kernel
+
+def gaussian_filter(latents, kernel_size=3, sigma=1.0):
+ channels = latents.shape[1]
+ kernel = gaussian_kernel(kernel_size, sigma, channels).to(latents.device, latents.dtype)
+ blurred_latents = F.conv2d(latents, kernel, padding=kernel_size//2, groups=channels)
+
+ return blurred_latents
+
+def get_views(height, width, h_window_size=128, w_window_size=128, scale_factor=8):
+ height = int(height)
+ width = int(width)
+ h_window_stride = h_window_size // 2
+ w_window_stride = w_window_size // 2
+ h_window_size = int(h_window_size / scale_factor)
+ w_window_size = int(w_window_size / scale_factor)
+ h_window_stride = int(h_window_stride / scale_factor)
+ w_window_stride = int(w_window_stride / scale_factor)
+ num_blocks_height = int((height - h_window_size) / h_window_stride - 1e-6) + 2 if height > h_window_size else 1
+ num_blocks_width = int((width - w_window_size) / w_window_stride - 1e-6) + 2 if width > w_window_size else 1
+ total_num_blocks = int(num_blocks_height * num_blocks_width)
+ views = []
+ for i in range(total_num_blocks):
+ h_start = int((i // num_blocks_width) * h_window_stride)
+ h_end = h_start + h_window_size
+ w_start = int((i % num_blocks_width) * w_window_stride)
+ w_end = w_start + w_window_size
+
+ if h_end > height:
+ h_start = int(h_start + height - h_end)
+ h_end = int(height)
+ if w_end > width:
+ w_start = int(w_start + width - w_end)
+ w_end = int(width)
+ if h_start < 0:
+ h_end = int(h_end - h_start)
+ h_start = 0
+ if w_start < 0:
+ w_end = int(w_end - w_start)
+ w_start = 0
+
+ random_jitter = True
+ if random_jitter:
+ h_jitter_range = h_window_size // 8
+ w_jitter_range = w_window_size // 8
+ h_jitter = 0
+ w_jitter = 0
+
+ if (w_start != 0) and (w_end != width):
+ w_jitter = random.randint(-w_jitter_range, w_jitter_range)
+ elif (w_start == 0) and (w_end != width):
+ w_jitter = random.randint(-w_jitter_range, 0)
+ elif (w_start != 0) and (w_end == width):
+ w_jitter = random.randint(0, w_jitter_range)
+ if (h_start != 0) and (h_end != height):
+ h_jitter = random.randint(-h_jitter_range, h_jitter_range)
+ elif (h_start == 0) and (h_end != height):
+ h_jitter = random.randint(-h_jitter_range, 0)
+ elif (h_start != 0) and (h_end == height):
+ h_jitter = random.randint(0, h_jitter_range)
+ h_start += (h_jitter + h_jitter_range)
+ h_end += (h_jitter + h_jitter_range)
+ w_start += (w_jitter + w_jitter_range)
+ w_end += (w_jitter + w_jitter_range)
+
+ views.append((h_start, h_end, w_start, w_end))
+ return views
+
+def scale_forward(
+ self,
+ hidden_states: torch.FloatTensor,
+ attention_mask: Optional[torch.FloatTensor] = None,
+ encoder_hidden_states: Optional[torch.FloatTensor] = None,
+ encoder_attention_mask: Optional[torch.FloatTensor] = None,
+ timestep: Optional[torch.LongTensor] = None,
+ cross_attention_kwargs: Dict[str, Any] = None,
+ class_labels: Optional[torch.LongTensor] = None,
+):
+ # Notice that normalization is always applied before the real computation in the following blocks.
+ if self.current_hw:
+ current_scale_num_h, current_scale_num_w = max(self.current_hw[0] // 1024, 1), max(self.current_hw[1] // 1024, 1)
+ else:
+ current_scale_num_h, current_scale_num_w = 1, 1
+
+ # 0. Self-Attention
+ if self.use_ada_layer_norm:
+ norm_hidden_states = self.norm1(hidden_states, timestep)
+ elif self.use_ada_layer_norm_zero:
+ norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.norm1(
+ hidden_states, timestep, class_labels, hidden_dtype=hidden_states.dtype
+ )
+ else:
+ norm_hidden_states = self.norm1(hidden_states)
+
+ # 2. Prepare GLIGEN inputs
+ cross_attention_kwargs = cross_attention_kwargs.copy() if cross_attention_kwargs is not None else {}
+ gligen_kwargs = cross_attention_kwargs.pop("gligen", None)
+
+ ratio_hw = current_scale_num_h / current_scale_num_w
+ latent_h = int((norm_hidden_states.shape[1] * ratio_hw) ** 0.5)
+ latent_w = int(latent_h / ratio_hw)
+ scale_factor = 128 * current_scale_num_h / latent_h
+ if ratio_hw > 1:
+ sub_h = 128
+ sub_w = int(128 / ratio_hw)
+ else:
+ sub_h = int(128 * ratio_hw)
+ sub_w = 128
+
+ h_jitter_range = int(sub_h / scale_factor // 8)
+ w_jitter_range = int(sub_w / scale_factor // 8)
+ views = get_views(latent_h, latent_w, sub_h, sub_w, scale_factor = scale_factor)
+
+ current_scale_num = max(current_scale_num_h, current_scale_num_w)
+ global_views = [[h, w] for h in range(current_scale_num_h) for w in range(current_scale_num_w)]
+
+ four_window = True
+ fourg_window = False
+
+ if four_window:
+ norm_hidden_states_ = rearrange(norm_hidden_states, 'bh (h w) d -> bh h w d', h = latent_h)
+ norm_hidden_states_ = F.pad(norm_hidden_states_, (0, 0, w_jitter_range, w_jitter_range, h_jitter_range, h_jitter_range), 'constant', 0)
+ value = torch.zeros_like(norm_hidden_states_)
+ count = torch.zeros_like(norm_hidden_states_)
+ for index, view in enumerate(views):
+ h_start, h_end, w_start, w_end = view
+ local_states = norm_hidden_states_[:, h_start:h_end, w_start:w_end, :]
+ local_states = rearrange(local_states, 'bh h w d -> bh (h w) d')
+ local_output = self.attn1(
+ local_states,
+ encoder_hidden_states=encoder_hidden_states if self.only_cross_attention else None,
+ attention_mask=attention_mask,
+ **cross_attention_kwargs,
+ )
+ local_output = rearrange(local_output, 'bh (h w) d -> bh h w d', h = int(sub_h / scale_factor))
+
+ value[:, h_start:h_end, w_start:w_end, :] += local_output * 1
+ count[:, h_start:h_end, w_start:w_end, :] += 1
+
+ value = value[:, h_jitter_range:-h_jitter_range, w_jitter_range:-w_jitter_range, :]
+ count = count[:, h_jitter_range:-h_jitter_range, w_jitter_range:-w_jitter_range, :]
+ attn_output = torch.where(count>0, value/count, value)
+
+ gaussian_local = gaussian_filter(attn_output, kernel_size=(2*current_scale_num-1), sigma=1.0)
+
+ attn_output_global = self.attn1(
+ norm_hidden_states,
+ encoder_hidden_states=encoder_hidden_states if self.only_cross_attention else None,
+ attention_mask=attention_mask,
+ **cross_attention_kwargs,
+ )
+ attn_output_global = rearrange(attn_output_global, 'bh (h w) d -> bh h w d', h = latent_h)
+
+ gaussian_global = gaussian_filter(attn_output_global, kernel_size=(2*current_scale_num-1), sigma=1.0)
+
+ attn_output = gaussian_local + (attn_output_global - gaussian_global)
+ attn_output = rearrange(attn_output, 'bh h w d -> bh (h w) d')
+
+ elif fourg_window:
+ norm_hidden_states = rearrange(norm_hidden_states, 'bh (h w) d -> bh h w d', h = latent_h)
+ norm_hidden_states_ = F.pad(norm_hidden_states, (0, 0, w_jitter_range, w_jitter_range, h_jitter_range, h_jitter_range), 'constant', 0)
+ value = torch.zeros_like(norm_hidden_states_)
+ count = torch.zeros_like(norm_hidden_states_)
+ for index, view in enumerate(views):
+ h_start, h_end, w_start, w_end = view
+ local_states = norm_hidden_states_[:, h_start:h_end, w_start:w_end, :]
+ local_states = rearrange(local_states, 'bh h w d -> bh (h w) d')
+ local_output = self.attn1(
+ local_states,
+ encoder_hidden_states=encoder_hidden_states if self.only_cross_attention else None,
+ attention_mask=attention_mask,
+ **cross_attention_kwargs,
+ )
+ local_output = rearrange(local_output, 'bh (h w) d -> bh h w d', h = int(sub_h / scale_factor))
+
+ value[:, h_start:h_end, w_start:w_end, :] += local_output * 1
+ count[:, h_start:h_end, w_start:w_end, :] += 1
+
+ value = value[:, h_jitter_range:-h_jitter_range, w_jitter_range:-w_jitter_range, :]
+ count = count[:, h_jitter_range:-h_jitter_range, w_jitter_range:-w_jitter_range, :]
+ attn_output = torch.where(count>0, value/count, value)
+
+ gaussian_local = gaussian_filter(attn_output, kernel_size=(2*current_scale_num-1), sigma=1.0)
+
+ value = torch.zeros_like(norm_hidden_states)
+ count = torch.zeros_like(norm_hidden_states)
+ for index, global_view in enumerate(global_views):
+ h, w = global_view
+ global_states = norm_hidden_states[:, h::current_scale_num_h, w::current_scale_num_w, :]
+ global_states = rearrange(global_states, 'bh h w d -> bh (h w) d')
+ global_output = self.attn1(
+ global_states,
+ encoder_hidden_states=encoder_hidden_states if self.only_cross_attention else None,
+ attention_mask=attention_mask,
+ **cross_attention_kwargs,
+ )
+ global_output = rearrange(global_output, 'bh (h w) d -> bh h w d', h = int(global_output.shape[1] ** 0.5))
+
+ value[:, h::current_scale_num_h, w::current_scale_num_w, :] += global_output * 1
+ count[:, h::current_scale_num_h, w::current_scale_num_w, :] += 1
+
+ attn_output_global = torch.where(count>0, value/count, value)
+
+ gaussian_global = gaussian_filter(attn_output_global, kernel_size=(2*current_scale_num-1), sigma=1.0)
+
+ attn_output = gaussian_local + (attn_output_global - gaussian_global)
+ attn_output = rearrange(attn_output, 'bh h w d -> bh (h w) d')
+
+ else:
+ attn_output = self.attn1(
+ norm_hidden_states,
+ encoder_hidden_states=encoder_hidden_states if self.only_cross_attention else None,
+ attention_mask=attention_mask,
+ **cross_attention_kwargs,
+ )
+
+ if self.use_ada_layer_norm_zero:
+ attn_output = gate_msa.unsqueeze(1) * attn_output
+ hidden_states = attn_output + hidden_states
+
+ # 2.5 GLIGEN Control
+ if gligen_kwargs is not None:
+ hidden_states = self.fuser(hidden_states, gligen_kwargs["objs"])
+ # 2.5 ends
+
+ # 3. Cross-Attention
+ if self.attn2 is not None:
+ norm_hidden_states = (
+ self.norm2(hidden_states, timestep) if self.use_ada_layer_norm else self.norm2(hidden_states)
+ )
+ attn_output = self.attn2(
+ norm_hidden_states,
+ encoder_hidden_states=encoder_hidden_states,
+ attention_mask=encoder_attention_mask,
+ **cross_attention_kwargs,
+ )
+ hidden_states = attn_output + hidden_states
+
+ # 4. Feed-forward
+ norm_hidden_states = self.norm3(hidden_states)
+
+ if self.use_ada_layer_norm_zero:
+ norm_hidden_states = norm_hidden_states * (1 + scale_mlp[:, None]) + shift_mlp[:, None]
+
+ if self._chunk_size is not None:
+ # "feed_forward_chunk_size" can be used to save memory
+ if norm_hidden_states.shape[self._chunk_dim] % self._chunk_size != 0:
+ raise ValueError(
+ f"`hidden_states` dimension to be chunked: {norm_hidden_states.shape[self._chunk_dim]} has to be divisible by chunk size: {self._chunk_size}. Make sure to set an appropriate `chunk_size` when calling `unet.enable_forward_chunking`."
+ )
+
+ num_chunks = norm_hidden_states.shape[self._chunk_dim] // self._chunk_size
+ ff_output = torch.cat(
+ [
+ self.ff(hid_slice)
+ for hid_slice in norm_hidden_states.chunk(num_chunks, dim=self._chunk_dim)
+ ],
+ dim=self._chunk_dim,
+ )
+ else:
+ ff_output = self.ff(norm_hidden_states)
+
+ if self.use_ada_layer_norm_zero:
+ ff_output = gate_mlp.unsqueeze(1) * ff_output
+
+ hidden_states = ff_output + hidden_states
+
+ return hidden_states
+
+def ori_forward(
+ self,
+ hidden_states: torch.FloatTensor,
+ attention_mask: Optional[torch.FloatTensor] = None,
+ encoder_hidden_states: Optional[torch.FloatTensor] = None,
+ encoder_attention_mask: Optional[torch.FloatTensor] = None,
+ timestep: Optional[torch.LongTensor] = None,
+ cross_attention_kwargs: Dict[str, Any] = None,
+ class_labels: Optional[torch.LongTensor] = None,
+):
+ # Notice that normalization is always applied before the real computation in the following blocks.
+ # 0. Self-Attention
+ if self.use_ada_layer_norm:
+ norm_hidden_states = self.norm1(hidden_states, timestep)
+ elif self.use_ada_layer_norm_zero:
+ norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.norm1(
+ hidden_states, timestep, class_labels, hidden_dtype=hidden_states.dtype
+ )
+ else:
+ norm_hidden_states = self.norm1(hidden_states)
+
+ # 2. Prepare GLIGEN inputs
+ cross_attention_kwargs = cross_attention_kwargs.copy() if cross_attention_kwargs is not None else {}
+ gligen_kwargs = cross_attention_kwargs.pop("gligen", None)
+
+ attn_output = self.attn1(
+ norm_hidden_states,
+ encoder_hidden_states=encoder_hidden_states if self.only_cross_attention else None,
+ attention_mask=attention_mask,
+ **cross_attention_kwargs,
+ )
+
+ if self.use_ada_layer_norm_zero:
+ attn_output = gate_msa.unsqueeze(1) * attn_output
+ hidden_states = attn_output + hidden_states
+
+ # 2.5 GLIGEN Control
+ if gligen_kwargs is not None:
+ hidden_states = self.fuser(hidden_states, gligen_kwargs["objs"])
+ # 2.5 ends
+
+ # 3. Cross-Attention
+ if self.attn2 is not None:
+ norm_hidden_states = (
+ self.norm2(hidden_states, timestep) if self.use_ada_layer_norm else self.norm2(hidden_states)
+ )
+ attn_output = self.attn2(
+ norm_hidden_states,
+ encoder_hidden_states=encoder_hidden_states,
+ attention_mask=encoder_attention_mask,
+ **cross_attention_kwargs,
+ )
+ hidden_states = attn_output + hidden_states
+
+ # 4. Feed-forward
+ norm_hidden_states = self.norm3(hidden_states)
+
+ if self.use_ada_layer_norm_zero:
+ norm_hidden_states = norm_hidden_states * (1 + scale_mlp[:, None]) + shift_mlp[:, None]
+
+ if self._chunk_size is not None:
+ # "feed_forward_chunk_size" can be used to save memory
+ if norm_hidden_states.shape[self._chunk_dim] % self._chunk_size != 0:
+ raise ValueError(
+ f"`hidden_states` dimension to be chunked: {norm_hidden_states.shape[self._chunk_dim]} has to be divisible by chunk size: {self._chunk_size}. Make sure to set an appropriate `chunk_size` when calling `unet.enable_forward_chunking`."
+ )
+
+ num_chunks = norm_hidden_states.shape[self._chunk_dim] // self._chunk_size
+ ff_output = torch.cat(
+ [
+ self.ff(hid_slice)
+ for hid_slice in norm_hidden_states.chunk(num_chunks, dim=self._chunk_dim)
+ ],
+ dim=self._chunk_dim,
+ )
+ else:
+ ff_output = self.ff(norm_hidden_states)
+
+ if self.use_ada_layer_norm_zero:
+ ff_output = gate_mlp.unsqueeze(1) * ff_output
+
+ hidden_states = ff_output + hidden_states
+
+ return hidden_states
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index 3b6f228ba..adb047511 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -365,13 +365,26 @@ def process_decode(p: processing.StableDiffusionProcessing, output):
else:
width = getattr(p, 'width', 0)
height = getattr(p, 'height', 0)
- results = processing_vae.vae_decode(
- latents = output.images,
- model = model,
- full_quality = p.full_quality,
- width = width,
- height = height,
- )
+ if isinstance(output.images, list):
+ results = []
+ for i in range(len(output.images)):
+ result_batch = processing_vae.vae_decode(
+ latents = output.images[i],
+ model = model,
+ full_quality = p.full_quality,
+ width = width,
+ height = height,
+ )
+ for result in list(result_batch):
+ results.append(result)
+ else:
+ results = processing_vae.vae_decode(
+ latents = output.images,
+ model = model,
+ full_quality = p.full_quality,
+ width = width,
+ height = height,
+ )
elif hasattr(output, 'images'):
results = output.images
else:
diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py
index cd51043c7..723f7b181 100644
--- a/modules/sd_samplers_common.py
+++ b/modules/sd_samplers_common.py
@@ -40,7 +40,6 @@ def single_sample_to_image(sample, approximation=None):
if approximation is None:
warn_once('Unknown decode type')
approximation = 0
- # normal sample is [4,64,64]
try:
if sample.dtype == torch.bfloat16 and (approximation == 0 or approximation == 1):
sample = sample.to(torch.float16)
@@ -62,6 +61,9 @@ def single_sample_to_image(sample, approximation=None):
sample = sample * (5 / abs(sample_min))
"""
if approximation == 2: # TAESD
+ if sample.shape[-1] > 128 or sample.shape[-2] > 128:
+ scale = 128 / max(sample.shape[-1], sample.shape[-2])
+ sample = torch.nn.functional.interpolate(sample.unsqueeze(0), scale_factor=[scale, scale], mode='bilinear', align_corners=False)[0]
x_sample = sd_vae_taesd.decode(sample)
x_sample = (1.0 + x_sample) / 2.0 # preview requires smaller range
elif shared.sd_model_type == 'sc' and approximation != 3:
diff --git a/modules/sd_vae_taesd.py b/modules/sd_vae_taesd.py
index 4d213ad48..a1959817c 100644
--- a/modules/sd_vae_taesd.py
+++ b/modules/sd_vae_taesd.py
@@ -169,6 +169,9 @@ def decode(latents):
if vae is None:
return latents
try:
+ size = max(latents.shape[-1], latents.shape[-2])
+ if size > 256:
+ return latents
with devices.inference_context():
latents = latents.detach().clone().to(devices.device, dtype)
if len(latents.shape) == 3:
diff --git a/modules/shared.py b/modules/shared.py
index eaf4b361d..90dbe2647 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -873,6 +873,7 @@ def get_default_modes():
options_templates.update(options_section(('control', "Control Options"), {
"control_max_units": OptionInfo(4, "Maximum number of units", gr.Slider, {"minimum": 1, "maximum": 10, "step": 1}),
+ "control_tiles": OptionInfo("1x1, 1x2, 1x3, 1x4, 2x1, 2x1, 2x2, 2x3, 2x4, 3x1, 3x2, 3x3, 3x4, 4x1, 4x2, 4x3, 4x4", "Tiling options"),
"control_move_processor": OptionInfo(False, "Processor move to CPU after use"),
"control_unload_processor": OptionInfo(False, "Processor unload after use"),
}))
diff --git a/modules/shared_state.py b/modules/shared_state.py
index 3d3cb1ae6..a3312ec33 100644
--- a/modules/shared_state.py
+++ b/modules/shared_state.py
@@ -141,9 +141,9 @@ def set_current_image(self):
if self.job == 'VAE': # avoid generating preview while vae is running
return
from modules.shared import opts, cmd_opts
- if cmd_opts.lowvram or self.api:
+ if cmd_opts.lowvram or self.api or not opts.live_previews_enable or opts.show_progress_every_n_steps <= 0:
return
- if abs(self.sampling_step - self.current_image_sampling_step) >= opts.show_progress_every_n_steps and opts.live_previews_enable and opts.show_progress_every_n_steps > 0:
+ if abs(self.sampling_step - self.current_image_sampling_step) >= opts.show_progress_every_n_steps:
self.do_set_current_image()
def do_set_current_image(self):
diff --git a/modules/ui_control.py b/modules/ui_control.py
index 5a146a8fc..7baf74d75 100644
--- a/modules/ui_control.py
+++ b/modules/ui_control.py
@@ -254,7 +254,7 @@ def create_ui(_blocks: gr.Blocks=None):
control_start = gr.Slider(label="CN Start", minimum=0.0, maximum=1.0, step=0.05, value=0, elem_id=f'control_unit-{i}-start')
control_end = gr.Slider(label="CN End", minimum=0.0, maximum=1.0, step=0.05, value=1.0, elem_id=f'control_unit-{i}-end')
control_mode = gr.Dropdown(label="CN Mode", choices=['default'], value='default', visible=False, elem_id=f'control_unit-{i}-mode')
- control_tile = gr.Dropdown(label="CN Tiles", choices=['1x1', '1x2', '1x3', '1x4', '2x1', '2x1', '2x2', '2x3', '2x4', '3x1', '3x2', '3x3', '3x4', '4x1', '4x2', '4x3', '4x4'], value='1x1', visible=False, elem_id=f'control_unit-{i}-tile')
+ control_tile = gr.Dropdown(label="CN Tiles", choices=[x.strip() for x in shared.opts.control_tiles.split(',') if 'x' in x], value='1x1', visible=False, elem_id=f'control_unit-{i}-tile')
reset_btn = ui_components.ToolButton(value=ui_symbols.reset)
image_upload = gr.UploadButton(label=ui_symbols.upload, file_types=['image'], elem_classes=['form', 'gradio-button', 'tool'])
image_reuse= ui_components.ToolButton(value=ui_symbols.reuse)
diff --git a/scripts/freescale.py b/scripts/freescale.py
new file mode 100644
index 000000000..672ceea41
--- /dev/null
+++ b/scripts/freescale.py
@@ -0,0 +1,130 @@
+import gradio as gr
+from modules import scripts, processing, shared, sd_models
+
+
+registered = False
+
+
+class Script(scripts.Script):
+ def __init__(self):
+ super().__init__()
+ self.orig_pipe = None
+ self.orig_slice = None
+ self.orig_tile = None
+ self.is_img2img = False
+
+ def title(self):
+ return 'FreeScale: Tuning-Free Scale Fusion'
+
+ def show(self, is_img2img):
+ self.is_img2img = is_img2img
+ return shared.native
+
+ def ui(self, _is_img2img): # ui elements
+ with gr.Row():
+ gr.HTML('  FreeScale: Tuning-Free Scale Fusion
')
+ with gr.Row():
+ cosine_scale = gr.Slider(minimum=0.1, maximum=5.0, value=2.0, label='Cosine scale')
+ override_sampler = gr.Checkbox(value=True, label='Override sampler')
+ with gr.Row(visible=self.is_img2img):
+ cosine_scale_bg = gr.Slider(minimum=0.1, maximum=5.0, value=1.0, label='Cosine Background')
+ dilate_tau = gr.Slider(minimum=1, maximum=100, value=35, label='Dilate tau')
+ with gr.Row():
+ s1_enable = gr.Checkbox(value=True, label='1st Stage', interactive=False)
+ s1_scale = gr.Slider(minimum=1, maximum=8.0, value=1.0, label='Scale')
+ s1_restart = gr.Slider(minimum=0, maximum=1.0, value=0.75, label='Restart step')
+ with gr.Row():
+ s2_enable = gr.Checkbox(value=True, label='2nd Stage')
+ s2_scale = gr.Slider(minimum=1, maximum=8.0, value=2.0, label='Scale')
+ s2_restart = gr.Slider(minimum=0, maximum=1.0, value=0.75, label='Restart step')
+ with gr.Row():
+ s3_enable = gr.Checkbox(value=False, label='3rd Stage')
+ s3_scale = gr.Slider(minimum=1, maximum=8.0, value=3.0, label='Scale')
+ s3_restart = gr.Slider(minimum=0, maximum=1.0, value=0.75, label='Restart step')
+ with gr.Row():
+ s4_enable = gr.Checkbox(value=False, label='4th Stage')
+ s4_scale = gr.Slider(minimum=1, maximum=8.0, value=4.0, label='Scale')
+ s4_restart = gr.Slider(minimum=0, maximum=1.0, value=0.75, label='Restart step')
+ return [cosine_scale, override_sampler, cosine_scale_bg, dilate_tau, s1_enable, s1_scale, s1_restart, s2_enable, s2_scale, s2_restart, s3_enable, s3_scale, s3_restart, s4_enable, s4_scale, s4_restart]
+
+ def run(self, p: processing.StableDiffusionProcessing, cosine_scale, override_sampler, cosine_scale_bg, dilate_tau, s1_enable, s1_scale, s1_restart, s2_enable, s2_scale, s2_restart, s3_enable, s3_scale, s3_restart, s4_enable, s4_scale, s4_restart): # pylint: disable=arguments-differ
+ supported_model_list = ['sdxl']
+ if shared.sd_model_type not in supported_model_list:
+ shared.log.warning(f'FreeScale: class={shared.sd_model.__class__.__name__} model={shared.sd_model_type} required={supported_model_list}')
+ return None
+
+ if self.is_img2img:
+ if p.init_images is None or len(p.init_images) == 0:
+ shared.log.warning('FreeScale: missing input image')
+ return None
+
+ from modules.freescale import StableDiffusionXLFreeScale, StableDiffusionXLFreeScaleImg2Img
+ self.orig_pipe = shared.sd_model
+ self.orig_slice = shared.opts.diffusers_vae_slicing
+ self.orig_tile = shared.opts.diffusers_vae_tiling
+
+ def scale(x):
+ if (p.width == 0 or p.height == 0) and p.init_images is not None:
+ p.width, p.height = p.init_images[0].width, p.init_images[0].height
+ resolution = [int(8 * p.width * x // 8), int(8 * p.height * x // 8)]
+ return resolution
+
+ scales = []
+ resolutions_list = []
+ restart_steps = []
+ if s1_enable:
+ scales.append(s1_scale)
+ resolutions_list.append(scale(s1_scale))
+ restart_steps.append(int(p.steps * s1_restart))
+ if s2_enable and s2_scale > s1_scale:
+ scales.append(s2_scale)
+ resolutions_list.append(scale(s2_scale))
+ restart_steps.append(int(p.steps * s2_restart))
+ if s3_enable and s3_scale > s2_scale:
+ scales.append(s3_scale)
+ resolutions_list.append(scale(s3_scale))
+ restart_steps.append(int(p.steps * s3_restart))
+ if s4_enable and s4_scale > s3_scale:
+ scales.append(s4_scale)
+ resolutions_list.append(scale(s4_scale))
+ restart_steps.append(int(p.steps * s4_restart))
+
+ p.task_args['resolutions_list'] = resolutions_list
+ p.task_args['cosine_scale'] = cosine_scale
+ p.task_args['restart_steps'] = [min(max(1, step), p.steps-1) for step in restart_steps]
+ if self.is_img2img:
+ p.task_args['cosine_scale_bg'] = cosine_scale_bg
+ p.task_args['dilate_tau'] = dilate_tau
+ p.task_args['img_path'] = p.init_images[0]
+ p.init_images = None
+ if override_sampler:
+ p.sampler_name = 'Euler a'
+
+ if p.width < 1024 or p.height < 1024:
+ shared.log.error(f'FreeScale: width={p.width} height={p.height} minimum=1024')
+ return None
+
+ if not self.is_img2img:
+ shared.sd_model = sd_models.switch_pipe(StableDiffusionXLFreeScale, shared.sd_model)
+ else:
+ shared.sd_model = sd_models.switch_pipe(StableDiffusionXLFreeScaleImg2Img, shared.sd_model)
+ shared.sd_model.enable_vae_slicing()
+ shared.sd_model.enable_vae_tiling()
+
+ shared.log.info(f'FreeScale: mode={"txt" if not self.is_img2img else "img"} cosine={cosine_scale} bg={cosine_scale_bg} tau={dilate_tau} scales={scales} resolutions={resolutions_list} steps={restart_steps} sampler={p.sampler_name}')
+ resolutions = ','.join([f'{x[0]}x{x[1]}' for x in resolutions_list])
+ steps = ','.join([str(x) for x in restart_steps])
+ p.extra_generation_params["FreeScale"] = f'cosine {cosine_scale} resolutions {resolutions} steps {steps}'
+
+ def after(self, p: processing.StableDiffusionProcessing, processed: processing.Processed, *args): # pylint: disable=arguments-differ, unused-argument
+ if self.orig_pipe is None:
+ return processed
+ # restore pipeline
+ if shared.sd_model_type == "sdxl":
+ shared.sd_model = self.orig_pipe
+ self.orig_pipe = None
+ if not self.orig_slice:
+ shared.sd_model.disable_vae_slicing()
+ if not self.orig_tile:
+ shared.sd_model.disable_vae_tiling()
+ return processed
From b1f1864099907e539b5fbea9bf765438afe2f327 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Sun, 15 Dec 2024 13:12:35 -0500
Subject: [PATCH 114/162] lint updates
Signed-off-by: Vladimir Mandic
---
.pylintrc | 1 +
.ruff.toml | 1 +
modules/control/run.py | 332 +++++++++++++++++++++-------------------
modules/lora/network.py | 1 -
modules/shared.py | 2 +-
5 files changed, 175 insertions(+), 162 deletions(-)
diff --git a/.pylintrc b/.pylintrc
index 59f1cb127..ad42ddd13 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -13,6 +13,7 @@ ignore-paths=/usr/lib/.*$,
modules/control/units,
modules/ctrlx,
modules/dml,
+ modules/freescale,
modules/ggml,
modules/hidiffusion,
modules/hijack,
diff --git a/.ruff.toml b/.ruff.toml
index c2d4a6f9a..4bab64260 100644
--- a/.ruff.toml
+++ b/.ruff.toml
@@ -7,6 +7,7 @@ exclude = [
"modules/consistory",
"modules/control/proc",
"modules/control/units",
+ "modules/freescale",
"modules/ggml",
"modules/hidiffusion",
"modules/hijack",
diff --git a/modules/control/run.py b/modules/control/run.py
index ac1ff233d..e780b9bae 100644
--- a/modules/control/run.py
+++ b/modules/control/run.py
@@ -45,6 +45,167 @@ def terminate(msg):
return msg
+def set_pipe(p, has_models, unit_type, selected_models, active_model, active_strength, control_conditioning, control_guidance_start, control_guidance_end, inits):
+ global pipe, instance # pylint: disable=global-statement
+ pipe = None
+ if has_models:
+ p.ops.append('control')
+ p.extra_generation_params["Control type"] = unit_type # overriden later with pretty-print
+ p.extra_generation_params["Control model"] = ';'.join([(m.model_id or '') for m in active_model if m.model is not None])
+ p.extra_generation_params["Control conditioning"] = control_conditioning if isinstance(control_conditioning, list) else [control_conditioning]
+ p.extra_generation_params['Control start'] = control_guidance_start if isinstance(control_guidance_start, list) else [control_guidance_start]
+ p.extra_generation_params['Control end'] = control_guidance_end if isinstance(control_guidance_end, list) else [control_guidance_end]
+ p.extra_generation_params["Control conditioning"] = ';'.join([str(c) for c in p.extra_generation_params["Control conditioning"]])
+ p.extra_generation_params['Control start'] = ';'.join([str(c) for c in p.extra_generation_params['Control start']])
+ p.extra_generation_params['Control end'] = ';'.join([str(c) for c in p.extra_generation_params['Control end']])
+ if unit_type == 't2i adapter' and has_models:
+ p.extra_generation_params["Control type"] = 'T2I-Adapter'
+ p.task_args['adapter_conditioning_scale'] = control_conditioning
+ instance = t2iadapter.AdapterPipeline(selected_models, shared.sd_model)
+ pipe = instance.pipeline
+ if inits is not None:
+ shared.log.warning('Control: T2I-Adapter does not support separate init image')
+ elif unit_type == 'controlnet' and has_models:
+ p.extra_generation_params["Control type"] = 'ControlNet'
+ p.task_args['controlnet_conditioning_scale'] = control_conditioning
+ p.task_args['control_guidance_start'] = control_guidance_start
+ p.task_args['control_guidance_end'] = control_guidance_end
+ p.task_args['guess_mode'] = p.guess_mode
+ instance = controlnet.ControlNetPipeline(selected_models, shared.sd_model, p=p)
+ pipe = instance.pipeline
+ elif unit_type == 'xs' and has_models:
+ p.extra_generation_params["Control type"] = 'ControlNet-XS'
+ p.controlnet_conditioning_scale = control_conditioning
+ p.control_guidance_start = control_guidance_start
+ p.control_guidance_end = control_guidance_end
+ instance = xs.ControlNetXSPipeline(selected_models, shared.sd_model)
+ pipe = instance.pipeline
+ if inits is not None:
+ shared.log.warning('Control: ControlNet-XS does not support separate init image')
+ elif unit_type == 'lite' and has_models:
+ p.extra_generation_params["Control type"] = 'ControlLLLite'
+ p.controlnet_conditioning_scale = control_conditioning
+ instance = lite.ControlLLitePipeline(shared.sd_model)
+ pipe = instance.pipeline
+ if inits is not None:
+ shared.log.warning('Control: ControlLLLite does not support separate init image')
+ elif unit_type == 'reference' and has_models:
+ p.extra_generation_params["Control type"] = 'Reference'
+ p.extra_generation_params["Control attention"] = p.attention
+ p.task_args['reference_attn'] = 'Attention' in p.attention
+ p.task_args['reference_adain'] = 'Adain' in p.attention
+ p.task_args['attention_auto_machine_weight'] = p.query_weight
+ p.task_args['gn_auto_machine_weight'] = p.adain_weight
+ p.task_args['style_fidelity'] = p.fidelity
+ instance = reference.ReferencePipeline(shared.sd_model)
+ pipe = instance.pipeline
+ if inits is not None:
+ shared.log.warning('Control: ControlNet-XS does not support separate init image')
+ else: # run in txt2img/img2img mode
+ if len(active_strength) > 0:
+ p.strength = active_strength[0]
+ pipe = shared.sd_model
+ instance = None
+ debug(f'Control: run type={unit_type} models={has_models} pipe={pipe.__class__.__name__ if pipe is not None else None}')
+ return pipe
+
+
+def check_active(p, unit_type, units):
+ active_process: List[processors.Processor] = [] # all active preprocessors
+ active_model: List[Union[controlnet.ControlNet, xs.ControlNetXS, t2iadapter.Adapter]] = [] # all active models
+ active_strength: List[float] = [] # strength factors for all active models
+ active_start: List[float] = [] # start step for all active models
+ active_end: List[float] = [] # end step for all active models
+ num_units = 0
+ for u in units:
+ if u.type != unit_type:
+ continue
+ num_units += 1
+ debug(f'Control unit: i={num_units} type={u.type} enabled={u.enabled}')
+ if not u.enabled:
+ if u.controlnet is not None and u.controlnet.model is not None:
+ debug(f'Control unit offload: model="{u.controlnet.model_id}" device={devices.cpu}')
+ sd_models.move_model(u.controlnet.model, devices.cpu)
+ continue
+ if u.controlnet is not None and u.controlnet.model is not None:
+ debug(f'Control unit offload: model="{u.controlnet.model_id}" device={devices.device}')
+ sd_models.move_model(u.controlnet.model, devices.device)
+ if unit_type == 't2i adapter' and u.adapter.model is not None:
+ active_process.append(u.process)
+ active_model.append(u.adapter)
+ active_strength.append(float(u.strength))
+ p.adapter_conditioning_factor = u.factor
+ shared.log.debug(f'Control T2I-Adapter unit: i={num_units} process="{u.process.processor_id}" model="{u.adapter.model_id}" strength={u.strength} factor={u.factor}')
+ elif unit_type == 'controlnet' and u.controlnet.model is not None:
+ active_process.append(u.process)
+ active_model.append(u.controlnet)
+ active_strength.append(float(u.strength))
+ active_start.append(float(u.start))
+ active_end.append(float(u.end))
+ p.guess_mode = u.guess
+ if isinstance(u.mode, str):
+ p.control_mode = u.choices.index(u.mode) if u.mode in u.choices else 0
+ p.is_tile = p.is_tile or 'tile' in u.mode.lower()
+ p.control_tile = u.tile
+ p.extra_generation_params["Control mode"] = u.mode
+ shared.log.debug(f'Control ControlNet unit: i={num_units} process="{u.process.processor_id}" model="{u.controlnet.model_id}" strength={u.strength} guess={u.guess} start={u.start} end={u.end} mode={u.mode}')
+ elif unit_type == 'xs' and u.controlnet.model is not None:
+ active_process.append(u.process)
+ active_model.append(u.controlnet)
+ active_strength.append(float(u.strength))
+ active_start.append(float(u.start))
+ active_end.append(float(u.end))
+ shared.log.debug(f'Control ControlNet-XS unit: i={num_units} process={u.process.processor_id} model={u.controlnet.model_id} strength={u.strength} guess={u.guess} start={u.start} end={u.end}')
+ elif unit_type == 'lite' and u.controlnet.model is not None:
+ active_process.append(u.process)
+ active_model.append(u.controlnet)
+ active_strength.append(float(u.strength))
+ shared.log.debug(f'Control ControlLLite unit: i={num_units} process={u.process.processor_id} model={u.controlnet.model_id} strength={u.strength} guess={u.guess} start={u.start} end={u.end}')
+ elif unit_type == 'reference':
+ p.override = u.override
+ p.attention = u.attention
+ p.query_weight = float(u.query_weight)
+ p.adain_weight = float(u.adain_weight)
+ p.fidelity = u.fidelity
+ shared.log.debug('Control Reference unit')
+ else:
+ if u.process.processor_id is not None:
+ active_process.append(u.process)
+ shared.log.debug(f'Control process unit: i={num_units} process={u.process.processor_id}')
+ active_strength.append(float(u.strength))
+ debug(f'Control active: process={len(active_process)} model={len(active_model)}')
+ return active_process, active_model, active_strength, active_start, active_end
+
+
+def check_enabled(p, unit_type, units, active_model, active_strength, active_start, active_end):
+ has_models = False
+ selected_models: List[Union[controlnet.ControlNetModel, xs.ControlNetXSModel, t2iadapter.AdapterModel]] = None
+ control_conditioning = None
+ control_guidance_start = None
+ control_guidance_end = None
+ if unit_type == 't2i adapter' or unit_type == 'controlnet' or unit_type == 'xs' or unit_type == 'lite':
+ if len(active_model) == 0:
+ selected_models = None
+ elif len(active_model) == 1:
+ selected_models = active_model[0].model if active_model[0].model is not None else None
+ p.is_tile = p.is_tile or 'tile' in active_model[0].model_id.lower()
+ has_models = selected_models is not None
+ control_conditioning = active_strength[0] if len(active_strength) > 0 else 1 # strength or list[strength]
+ control_guidance_start = active_start[0] if len(active_start) > 0 else 0
+ control_guidance_end = active_end[0] if len(active_end) > 0 else 1
+ else:
+ selected_models = [m.model for m in active_model if m.model is not None]
+ has_models = len(selected_models) > 0
+ control_conditioning = active_strength[0] if len(active_strength) == 1 else list(active_strength) # strength or list[strength]
+ control_guidance_start = active_start[0] if len(active_start) == 1 else list(active_start)
+ control_guidance_end = active_end[0] if len(active_end) == 1 else list(active_end)
+ elif unit_type == 'reference':
+ has_models = any(u.enabled for u in units if u.type == 'reference')
+ else:
+ pass
+ return has_models, selected_models, control_conditioning, control_guidance_start, control_guidance_end
+
+
def control_set(kwargs):
if kwargs:
global p_extra_args # pylint: disable=global-statement
@@ -88,16 +249,11 @@ def control_run(state: str = '',
if u.process is not None and u.process.override is None and u.override is not None:
u.process.override = u.override
- global instance, pipe, original_pipeline # pylint: disable=global-statement
+ global pipe, original_pipeline # pylint: disable=global-statement
debug(f'Control: type={unit_type} input={inputs} init={inits} type={input_type}')
if inputs is None or (type(inputs) is list and len(inputs) == 0):
inputs = [None]
output_images: List[Image.Image] = [] # output images
- active_process: List[processors.Processor] = [] # all active preprocessors
- active_model: List[Union[controlnet.ControlNet, xs.ControlNetXS, t2iadapter.Adapter]] = [] # all active models
- active_strength: List[float] = [] # strength factors for all active models
- active_start: List[float] = [] # start step for all active models
- active_end: List[float] = [] # end step for all active models
processed_image: Image.Image = None # last processed image
if mask is not None and input_type == 0:
input_type = 1 # inpaint always requires control_image
@@ -226,160 +382,17 @@ def control_run(state: str = '',
unit_type = unit_type.strip().lower() if unit_type is not None else ''
t0 = time.time()
- num_units = 0
- for u in units:
- if u.type != unit_type:
- continue
- num_units += 1
- debug(f'Control unit: i={num_units} type={u.type} enabled={u.enabled}')
- if not u.enabled:
- if u.controlnet is not None and u.controlnet.model is not None:
- debug(f'Control unit offload: model="{u.controlnet.model_id}" device={devices.cpu}')
- sd_models.move_model(u.controlnet.model, devices.cpu)
- continue
- if u.controlnet is not None and u.controlnet.model is not None:
- debug(f'Control unit offload: model="{u.controlnet.model_id}" device={devices.device}')
- sd_models.move_model(u.controlnet.model, devices.device)
- if unit_type == 't2i adapter' and u.adapter.model is not None:
- active_process.append(u.process)
- active_model.append(u.adapter)
- active_strength.append(float(u.strength))
- p.adapter_conditioning_factor = u.factor
- shared.log.debug(f'Control T2I-Adapter unit: i={num_units} process="{u.process.processor_id}" model="{u.adapter.model_id}" strength={u.strength} factor={u.factor}')
- elif unit_type == 'controlnet' and u.controlnet.model is not None:
- active_process.append(u.process)
- active_model.append(u.controlnet)
- active_strength.append(float(u.strength))
- active_start.append(float(u.start))
- active_end.append(float(u.end))
- p.guess_mode = u.guess
- if isinstance(u.mode, str):
- p.control_mode = u.choices.index(u.mode) if u.mode in u.choices else 0
- p.is_tile = p.is_tile or 'tile' in u.mode.lower()
- p.control_tile = u.tile
- p.extra_generation_params["Control mode"] = u.mode
- shared.log.debug(f'Control ControlNet unit: i={num_units} process="{u.process.processor_id}" model="{u.controlnet.model_id}" strength={u.strength} guess={u.guess} start={u.start} end={u.end} mode={u.mode}')
- elif unit_type == 'xs' and u.controlnet.model is not None:
- active_process.append(u.process)
- active_model.append(u.controlnet)
- active_strength.append(float(u.strength))
- active_start.append(float(u.start))
- active_end.append(float(u.end))
- shared.log.debug(f'Control ControlNet-XS unit: i={num_units} process={u.process.processor_id} model={u.controlnet.model_id} strength={u.strength} guess={u.guess} start={u.start} end={u.end}')
- elif unit_type == 'lite' and u.controlnet.model is not None:
- active_process.append(u.process)
- active_model.append(u.controlnet)
- active_strength.append(float(u.strength))
- shared.log.debug(f'Control ControlLLite unit: i={num_units} process={u.process.processor_id} model={u.controlnet.model_id} strength={u.strength} guess={u.guess} start={u.start} end={u.end}')
- elif unit_type == 'reference':
- p.override = u.override
- p.attention = u.attention
- p.query_weight = float(u.query_weight)
- p.adain_weight = float(u.adain_weight)
- p.fidelity = u.fidelity
- shared.log.debug('Control Reference unit')
- else:
- if u.process.processor_id is not None:
- active_process.append(u.process)
- shared.log.debug(f'Control process unit: i={num_units} process={u.process.processor_id}')
- active_strength.append(float(u.strength))
- debug(f'Control active: process={len(active_process)} model={len(active_model)}')
+
+ active_process, active_model, active_strength, active_start, active_end = check_active(p, unit_type, units)
+ has_models, selected_models, control_conditioning, control_guidance_start, control_guidance_end = check_enabled(p, unit_type, units, active_model, active_strength, active_start, active_end)
processed: processing.Processed = None
image_txt = ''
info_txt = []
- has_models = False
- selected_models: List[Union[controlnet.ControlNetModel, xs.ControlNetXSModel, t2iadapter.AdapterModel]] = None
- control_conditioning = None
- control_guidance_start = None
- control_guidance_end = None
- if unit_type == 't2i adapter' or unit_type == 'controlnet' or unit_type == 'xs' or unit_type == 'lite':
- if len(active_model) == 0:
- selected_models = None
- elif len(active_model) == 1:
- selected_models = active_model[0].model if active_model[0].model is not None else None
- p.is_tile = p.is_tile or 'tile' in active_model[0].model_id.lower()
- has_models = selected_models is not None
- control_conditioning = active_strength[0] if len(active_strength) > 0 else 1 # strength or list[strength]
- control_guidance_start = active_start[0] if len(active_start) > 0 else 0
- control_guidance_end = active_end[0] if len(active_end) > 0 else 1
- else:
- selected_models = [m.model for m in active_model if m.model is not None]
- has_models = len(selected_models) > 0
- control_conditioning = active_strength[0] if len(active_strength) == 1 else list(active_strength) # strength or list[strength]
- control_guidance_start = active_start[0] if len(active_start) == 1 else list(active_start)
- control_guidance_end = active_end[0] if len(active_end) == 1 else list(active_end)
- elif unit_type == 'reference':
- has_models = any(u.enabled for u in units if u.type == 'reference')
- else:
- pass
+
p.is_tile = p.is_tile and has_models
- def set_pipe():
- global pipe, instance # pylint: disable=global-statement
- pipe = None
- if has_models:
- p.ops.append('control')
- p.extra_generation_params["Control type"] = unit_type # overriden later with pretty-print
- p.extra_generation_params["Control model"] = ';'.join([(m.model_id or '') for m in active_model if m.model is not None])
- p.extra_generation_params["Control conditioning"] = control_conditioning if isinstance(control_conditioning, list) else [control_conditioning]
- p.extra_generation_params['Control start'] = control_guidance_start if isinstance(control_guidance_start, list) else [control_guidance_start]
- p.extra_generation_params['Control end'] = control_guidance_end if isinstance(control_guidance_end, list) else [control_guidance_end]
- p.extra_generation_params["Control conditioning"] = ';'.join([str(c) for c in p.extra_generation_params["Control conditioning"]])
- p.extra_generation_params['Control start'] = ';'.join([str(c) for c in p.extra_generation_params['Control start']])
- p.extra_generation_params['Control end'] = ';'.join([str(c) for c in p.extra_generation_params['Control end']])
- if unit_type == 't2i adapter' and has_models:
- p.extra_generation_params["Control type"] = 'T2I-Adapter'
- p.task_args['adapter_conditioning_scale'] = control_conditioning
- instance = t2iadapter.AdapterPipeline(selected_models, shared.sd_model)
- pipe = instance.pipeline
- if inits is not None:
- shared.log.warning('Control: T2I-Adapter does not support separate init image')
- elif unit_type == 'controlnet' and has_models:
- p.extra_generation_params["Control type"] = 'ControlNet'
- p.task_args['controlnet_conditioning_scale'] = control_conditioning
- p.task_args['control_guidance_start'] = control_guidance_start
- p.task_args['control_guidance_end'] = control_guidance_end
- p.task_args['guess_mode'] = p.guess_mode
- instance = controlnet.ControlNetPipeline(selected_models, shared.sd_model, p=p)
- pipe = instance.pipeline
- elif unit_type == 'xs' and has_models:
- p.extra_generation_params["Control type"] = 'ControlNet-XS'
- p.controlnet_conditioning_scale = control_conditioning
- p.control_guidance_start = control_guidance_start
- p.control_guidance_end = control_guidance_end
- instance = xs.ControlNetXSPipeline(selected_models, shared.sd_model)
- pipe = instance.pipeline
- if inits is not None:
- shared.log.warning('Control: ControlNet-XS does not support separate init image')
- elif unit_type == 'lite' and has_models:
- p.extra_generation_params["Control type"] = 'ControlLLLite'
- p.controlnet_conditioning_scale = control_conditioning
- instance = lite.ControlLLitePipeline(shared.sd_model)
- pipe = instance.pipeline
- if inits is not None:
- shared.log.warning('Control: ControlLLLite does not support separate init image')
- elif unit_type == 'reference' and has_models:
- p.extra_generation_params["Control type"] = 'Reference'
- p.extra_generation_params["Control attention"] = p.attention
- p.task_args['reference_attn'] = 'Attention' in p.attention
- p.task_args['reference_adain'] = 'Adain' in p.attention
- p.task_args['attention_auto_machine_weight'] = p.query_weight
- p.task_args['gn_auto_machine_weight'] = p.adain_weight
- p.task_args['style_fidelity'] = p.fidelity
- instance = reference.ReferencePipeline(shared.sd_model)
- pipe = instance.pipeline
- if inits is not None:
- shared.log.warning('Control: ControlNet-XS does not support separate init image')
- else: # run in txt2img/img2img mode
- if len(active_strength) > 0:
- p.strength = active_strength[0]
- pipe = shared.sd_model
- instance = None
- debug(f'Control: run type={unit_type} models={has_models} pipe={pipe.__class__.__name__ if pipe is not None else None}')
- return pipe
-
- pipe = set_pipe()
+ pipe = set_pipe(p, has_models, unit_type, selected_models, active_model, active_strength, control_conditioning, control_guidance_start, control_guidance_end, inits)
debug(f'Control pipeline: class={pipe.__class__.__name__} args={vars(p)}')
t1, t2, t3 = time.time(), 0, 0
status = True
@@ -433,7 +446,7 @@ def set_pipe():
while status:
if pipe is None: # pipe may have been reset externally
- pipe = set_pipe()
+ pipe = set_pipe(p, has_models, unit_type, selected_models, active_model, active_strength, control_conditioning, control_guidance_start, control_guidance_end, inits)
debug(f'Control pipeline reinit: class={pipe.__class__.__name__}')
processed_image = None
if frame is not None:
@@ -578,7 +591,7 @@ def set_pipe():
elif 'image' in possible:
p.task_args['image'] = [p.init_images] if isinstance(p.init_images, Image.Image) else p.init_images
if 'control_mode' in possible:
- p.task_args['control_mode'] = p.control_mode
+ p.task_args['control_mode'] = getattr(p, 'control_mode', None)
if 'strength' in possible:
p.task_args['strength'] = p.denoising_strength
p.init_images = None
@@ -638,8 +651,8 @@ def set_pipe():
if unit_type == 'lite':
p.init_image = [input_image]
instance.apply(selected_models, processed_image, control_conditioning)
- if p.control_mode is not None:
- p.task_args['control_mode'] = p.control_mode
+ if getattr(p, 'control_mode', None) is not None:
+ p.task_args['control_mode'] = getattr(p, 'control_mode', None)
if hasattr(p, 'init_images') and p.init_images is None: # delete empty
del p.init_images
@@ -770,5 +783,4 @@ def set_pipe():
html_txt = html_txt + infotext_to_html(info_txt[0])
if is_generator:
yield (output_images, blended_image, html_txt, output_filename)
- else:
- return (output_images, blended_image, html_txt, output_filename)
+ return (output_images, blended_image, html_txt, output_filename)
diff --git a/modules/lora/network.py b/modules/lora/network.py
index 8e6f87368..97feb76f1 100644
--- a/modules/lora/network.py
+++ b/modules/lora/network.py
@@ -2,7 +2,6 @@
import enum
from typing import Union
from collections import namedtuple
-
from modules import sd_models, hashes, shared
diff --git a/modules/shared.py b/modules/shared.py
index 90dbe2647..5e353b1b1 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -490,7 +490,7 @@ def get_default_modes():
"advanced_sep": OptionInfo("Advanced Options
", "", gr.HTML),
"sd_checkpoint_autoload": OptionInfo(True, "Model autoload on start"),
"sd_checkpoint_autodownload": OptionInfo(True, "Model auto-download on demand"),
- "stream_load": OptionInfo(False, "Load models using stream loading method", gr.Checkbox, {"visible": not native }),
+ "stream_load": OptionInfo(False, "Model load using streams", gr.Checkbox),
"diffusers_eval": OptionInfo(True, "Force model eval", gr.Checkbox, {"visible": False }),
"diffusers_to_gpu": OptionInfo(False, "Load model directly to GPU"),
"disable_accelerate": OptionInfo(False, "Disable accelerate", gr.Checkbox, {"visible": False }),
From b89f41082daeda47f93a8d8013cdb95754a4e29e Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Sun, 15 Dec 2024 13:28:40 -0500
Subject: [PATCH 115/162] update requirements
Signed-off-by: Vladimir Mandic
---
TODO.md | 2 +-
requirements.txt | 8 ++++----
wiki | 2 +-
3 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/TODO.md b/TODO.md
index 63088d39f..996da5ad9 100644
--- a/TODO.md
+++ b/TODO.md
@@ -17,12 +17,12 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma
- SANA:
- LTX-Video:
- TorchAO:
-- ControlNetUnion/ControlNetPromax:
## Other
- IPAdapter negative:
- Control API enhance scripts compatibility
+- PixelSmith:
## Workaround in place
diff --git a/requirements.txt b/requirements.txt
index d9eba6958..572c4927b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -41,18 +41,18 @@ torchsde==0.2.6
antlr4-python3-runtime==4.9.3
requests==2.32.3
tqdm==4.66.5
-accelerate==1.1.1
+accelerate==1.2.1
opencv-contrib-python-headless==4.9.0.80
einops==0.4.1
gradio==3.43.2
-huggingface_hub==0.26.2
+huggingface_hub==0.26.5
numexpr==2.8.8
numpy==1.26.4
numba==0.59.1
protobuf==4.25.3
pytorch_lightning==1.9.4
-tokenizers==0.20.3
-transformers==4.46.3
+tokenizers==0.21.0
+transformers==4.47.0
urllib3==1.26.19
Pillow==10.4.0
timm==0.9.16
diff --git a/wiki b/wiki
index 8d63a0f04..a4eaad83c 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit 8d63a0f04687f24c4ef413f231970087f167175c
+Subproject commit a4eaad83ccb8e82cb91fde4c038877616ed012d6
From ab07788ab5bbd5556ad310482ede102582a86f59 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Mon, 16 Dec 2024 11:30:15 -0500
Subject: [PATCH 116/162] add sana
Signed-off-by: Vladimir Mandic
---
CHANGELOG.md | 13 ++-
html/reference.json | 13 +++
installer.py | 2 +-
...rge-Model--Sana_1600M_1024px_diffusers.jpg | Bin 0 -> 53061 bytes
modules/model_flux.py | 6 +-
modules/model_omnigen.py | 3 +-
modules/model_sana.py | 25 ++++
modules/model_te.py | 3 +-
modules/modeldata.py | 83 +++++++------
modules/pag/__init__.py | 7 +-
modules/processing_vae.py | 3 +-
modules/schedulers/scheduler_dpm_flowmatch.py | 3 +-
modules/sd_detect.py | 2 +
modules/sd_models.py | 3 +
modules/sd_samplers.py | 9 +-
modules/sd_samplers_common.py | 1 +
modules/sd_samplers_diffusers.py | 109 +++++++++---------
modules/shared_items.py | 21 ++--
modules/ui_sections.py | 4 +-
19 files changed, 188 insertions(+), 122 deletions(-)
create mode 100644 models/Reference/Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg
create mode 100644 modules/model_sana.py
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 35d3fcbe9..3a07f154c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,9 +1,19 @@
# Change Log for SD.Next
-## Update for 2024-12-15
+## Update for 2024-12-16
+
+- Sana: both 1.6B and 0.6B
+- ControlNet: better Union results, support for ProMax and Tile
+- FreeScale: run optimized iterative generation of images at different scales
+- Samplers: UniPC, DEIS, SA, DPM-Multistep: add FlowMatch sigma method and prediction type
### New models and integrations
+- [NVLabs Sana](https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px)
+ **Sana** can synthesize high-resolution images with strong text-image alignment by using **Gemma2** as text-encoder
+ support for both 1.6B and 0.6B models
+ to use, select from *networks -> models -> reference* and models will be auto-downloaded on first use
+ *reference values*: sampler: default, width/height: 1024, guidance scale: 4.5, attention guidance: 3.0, adaptive scaling: 0.0
- [Flux Tools](https://blackforestlabs.ai/flux-1-tools/)
**Redux** is actually a tool, **Fill** is inpaint/outpaint optimized version of *Flux-dev*
**Canny** & **Depth** are optimized versions of *Flux-dev* for their respective tasks: they are *not* ControlNets that work on top of a model
@@ -98,6 +108,7 @@
- **IPEX**: update to IPEX 2.5.10+xpu
- **OpenVINO**: update to 2024.5.0
- **Sampler** improvements
+ - UniPC, DEIS, SA, DPM-Multistep: allow FlowMatch method
- Euler FlowMatch: add sigma methods (*karras/exponential/betas*)
- Euler FlowMatch: allow using timestep presets to set sigmas
- DPM FlowMatch: update all and add sigma methods
diff --git a/html/reference.json b/html/reference.json
index 4a549586f..8a0965697 100644
--- a/html/reference.json
+++ b/html/reference.json
@@ -180,6 +180,19 @@
"extras": "sampler: Default, cfg_scale: 3.5"
},
+ "NVLabs Sana 1.6B": {
+ "path": "Efficient-Large-Model/Sana_1600M_1024px_diffusers",
+ "desc": "Sana is a text-to-image framework that can efficiently generate images up to 4096 × 4096 resolution. Sana can synthesize high-resolution, high-quality images with strong text-image alignment at a remarkably fast speed, deployable on laptop GPU.",
+ "preview": "Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg",
+ "skip": true
+ },
+ "NVLabs Sana 0.6B": {
+ "path": "Efficient-Large-Model/Sana_600M_1024px_diffusers",
+ "desc": "Sana is a text-to-image framework that can efficiently generate images up to 4096 × 4096 resolution. Sana can synthesize high-resolution, high-quality images with strong text-image alignment at a remarkably fast speed, deployable on laptop GPU.",
+ "preview": "Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg",
+ "skip": true
+ },
+
"VectorSpaceLab OmniGen v1": {
"path": "Shitao/OmniGen-v1",
"desc": "OmniGen is a unified image generation model that can generate a wide range of images from multi-modal prompts. It is designed to be simple, flexible and easy to use.",
diff --git a/installer.py b/installer.py
index 18a8ad1f1..a12b09d4d 100644
--- a/installer.py
+++ b/installer.py
@@ -459,7 +459,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
def check_diffusers():
if args.skip_all or args.skip_requirements:
return
- sha = '63243406ba5510c10d5cac931882918ceba926f9' # diffusers commit hash
+ sha = '5fb3a985173efaae7ff381b9040c386751d643da' # diffusers commit hash
pkg = pkg_resources.working_set.by_key.get('diffusers', None)
minor = int(pkg.version.split('.')[1] if pkg is not None else 0)
cur = opts.get('diffusers_version', '') if minor > 0 else ''
diff --git a/models/Reference/Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg b/models/Reference/Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..654f854034544bc8fafa5a0de21b54c4b3862709
GIT binary patch
literal 53061
zcmbTdby!=`(=Hkc6p9v#yK8Zm5L$}6JH;gwcZaq>3&EY@9w1QMp~aux30013;f$!EcLS(j?jH4JUj8#R|EnRrLPkM-jfVaP1M}sC
zy0?H=NXW>qP>@kkQBYpa4tN;{px~k6)AD?H{Z8Esjn0LDHzY0>o&IChUqX%X69ztW
z*RO9dh=@r@$rzcKSypK3^*v_X=~_Jl(7m0S
zV0MSk0OTGC9d@&2%_wfXx#sH=tH1Z@$r;O6^Una7YQjGi;lOP&^Q(%&2g1^K_f6&5
zkFz}>>-#j}aX-0D6y}NM#1oT7+`ClrU0!&z>k*?$%sEA0+@Z1hiDNBjvtMXJ`WZlG
zG9rD?2KpDR)b%v%fxXIMY4gN5L7mU~RNIyjam62$7O>55O15KtQeO+&A_c7=<;Tmo
z=HosWIT4LVpL%b(BU7i6(!xS)zqv&E`GFm`$Lo{&8#Tof--X518Ny
zqQ1jVv6D?&m%Oz8F5R&L;joJJ3*1A19N(d)#N8-hUSq
zN@Q`@+CoUkc2d=7Qh$b9N>+&XXkr}U$2MCZ=bMjPQanhllVV;pSZ&c~!m??8SKC6^
z+TmMM6;QHic4_Cyc3AfQ+gvtFJB+i(I0=+8%#OKtb|9%!?&BVQz&Hb|oyumpSEJTV
zv0&CTlFUAS21sezzo}p1dj=?ja?7`?LVZ>6l=^x&%w5A+T_j(#O6HiGXS`S^&EOnc
zHP$~`^=g}hb*mj%U=!w6!8vzu4?1rP_bUNVw~VXoMA4>0QyQ6jrS)Vxj@{T0K|@OQ
z;H#%ILW+0M!_NRj;Slo}*jW~IvUGJL&zMmo_vNc&1YHx>?KWDSZ$)OVo}J)6Y6gu0P(AWKX6tSwF$6AMlvBv!G
z8#RIqHlwte2N6Zp-xSkVIV+uPTz}*1Q6HVxC)`sZ`%YsNcmmvqiAxKQ4n7|>c`P&@
zrLYi*ygq6Kf(aV3Sku;7x+}PK?+_6h$q6!^ih94Zl_Csn^y$90xHZy@1b{cb7cee_
zVl-J~q1v;lZ_Ff#uo_dC6t51E+r@3b<40s~)%q6(4u@R>fv0r)suY3;%hr)1vg+|m
zP1_OMQNs0Jz1j{wViemyQOHy(b{+V=I5Vm~H^Lt6vyx8a0Jy5){ZRfSh>d!^6fNf=
zy{lflidZ@YB$gy;zrSzSFh4VCV;i_@*-!3AC2c^PblSh6d8)FJHGe|>Bt5M8)FJy6
zT-$;44A?N90_|S}?O^KEGiDXa>9H_I1NM}8F!hAm`Nj|O^r_?-&Kci!R~0FZr~uTn
zd~&e&!1bHhwH2~ba5ZEopCqOdZ;3GT>tJf5$I&SFUzAq-nUbnjZZu3>65LR(>U!s(
z5f@2HRLxd+UUjXIGC}pH)~cuvY(Tk@AT~ggk@rpZDxxuKk!*D{M3EQebDHFcjA1(9
zfPECS>g6qd-Tn;t6ZF&u@TK(Ge&9Stw-EV%^{m`1ubjF3I(|TV2JDZa?Dm>8K6tzIO0PDb4*wV6IEYUzSZC1VVH&rC2u2*^CDgBD>GdoYn0f{
z4M&&Z%iD2#xFku`2A}M3NE-bnxvr?}^3|@KEke{WF4{YEyJST#BdWLI_iQui?$mTI
z>L;dpa#UPGhL(9Bu}=4yBa~o4IBplvShWes#bq{LvD!dm4HdD
zyjEJQU3W>xdT=Im}eHbVPkvR(c4CEC5jk{G
zc8K1`E#)@l23Qc{MzG#dC_Mu>?Y(*ouyoY=pmq~J%>jLEvSM!zFA7QP2g5is9f%&&
zhDbNiHskunf0UdKe_`L#i?B}?KHqv^a5^m`QorW^WxJFq;V_`KXu<9w47!}^=xqjJ
z?4IoT=6w)n-{>sT$Gr|W4JZYcwN=>sEK|iLA7mDv7TPI{zj8)<w5oYT)>wWslfY~lfFGl>JEcHvt
z?Mf?YPp?2+aSes~j368I;sY?2rXMJYGEKR@IAJf))(>P@;jYf5h!L`mm2v7FZ$Y?j
zlmsrG0jUWMQ?fTYN~eeBY>8=_#e`S73tx&Mm+Q{}uHZ*C9egaZf?15WPg?HdHyuXw
zPZD$>y)wZ;qO=KNcufncHswIU7NYoLuEKP@Ozvr5Bih}N#pSnWK&1g$@mH>kMV`un
zWFd4uIu;T2%VT-%04)0VjPkBMWoZuU0al`g2?3@rN`Rp3T&>dmH<(Gn8Mbn|mHVLM
z$?@Vos*J{wCn9U8t#DpuQ>4kMtzbsz0r|yE8tpNv@=3FC2jHZ7cXuq0xnDg4KnlGP
z__~Fho=z0+sa5~YO0d;|g1U6jo=~3wGYBcexCf?CLCwythP7wFzex#(2^IfQjM+f!
z7n@z-={=Z`-frzSl4vn`YPOE8tJpD_6VWFf(W#y7CQnMuJ+}~O8K@mohP%jo{|p#|
z!+oiAcnsL#1I24q;-`(p=CSCkoxuRsKN?Ls1Q_aa<{>KT8oe>*H(XGIx~Q#Rn|+iM
zf{%YC?Fg=!p?#`@O`y>9xZkJt6CRk^Rd_e24p@74uJtP&OofxTSWX|e
zU$tu?j^?hr4MLAWZOw6Ez9Ee(f!c+~rF@&Z@Vg#8VN^Xr`Vyf1s06<8fMA-acxc#^
zDc_$%ea*5_rfew#I51tK9dcYGp+SVqlx#|2o>x7&$QxA&)nJG!(ATmWHSo>L6+aw(
z)kIG&R-MFROjXZi(Pv3R9f_?c+{)!z{M`d)2%Ao=IAgAd=
zOF>k9%0igCP3yY4U;5(Z2~Dh+i=`&&Bcw-^D(Q)tA?xX6d@<+4w&kWrpHvCT3#h9Rs|7c-yazGbN>B&>mAG!Qc5U-as6?-X;lAOAku_f4?31sQ$qWwL2(Nu>d*$ZtFY_;i0$b(~DA
zBy{Tzua(V27#eO-3cl_orr>+@jTuF>y_28Jncy!o7M=ar<3JK6c|}NLSfl_sO_H5a
zC9ld*Lp6j+o9fJI#qDPVX0m6T$`r1SV$Y>HeaBx2@^#tbjp@tIgQy{UP?H6O%EN1a
zX2ApYBdzh-a5ea{`^iq1-|nC8Hw%Dj1
zg*+{A3WSLTVY5l9%#?jbHs4IR0&M(H+O2cR!-LkmuvGVKe^Z8Kp0XQ{<^NI?a@ni@
zfzij}S}r4@*^~&^c?*IemvX3e#
z)FynvN0{J<-WM0|bLE_stY>@kPmXy1l|B-H!)K)xT*Pk(B!l(A4(pNfLGORY3PBS(gMA-=z4;BaVH+&Ft!7gP_4<
zwb`$0KD@)h>|NbcObZK53Dw{Oi^x!)=z{>&8_n9C1g%Lfe=+uhrZ7#8Cgw{2plpNn
zVsdZ*gF)`kt2zl{_y&b5w!DjzK7co(Md=7
zr6=dl0P7nYZTOv799B@uP5L)Ikl9H^g1NBIxeLu>P{~7?DF1PpxXNeP+R+n9@&os7
zkWHyw@skPd`P-mcVbDZXt60Dlw&$FVu=11PGoX1PNL&+IP+=F7GofvF{O;8eUzRLA
z41AORZ)QIw)&j$Fp+jm|XWL3N4koWmQXhvaY+Y1Ol3X&8p9cOmprwYs
z_nPITPn4;>L&A08JM~?h{(WV_gD&DMn-9BlDV0Xfcd$AZaIGX!SkNLTCEYuqPhPE}NUQ5IAOPFKl#ttFiP
z;XvfcyTY$&sJ{zOH=g&`EIHzdY7ExXmQ~1OzyBQ@vW|ivY6`l_erK#w{6i1r84&yo
zNHA2uKwk^RxU}<=S1!7e1+6&<)DG*cjW^+CC*DDKUzX{uIsTqtLxgZsJsL>$8DMNg
zl_Jm_4*l*o0mU!UNv5;3yq5ioEzLumxX9volAFi
zNfzS`6C<&@$eFSZvT{?^ZtSt?+b*3%g;ly#_TGuSS~t^B;iRS6p$f}IZi3KrK_<{yxEw^_y|mm#94jr*n()^(d^?<)69?C%
zZV<|8?buXM%LQ)k$x4l*TcpjBk3(UR5<3#^w7jII!d0sSkQd2Wtel2TBJLxTM3Zt{ac7|>&xVf+w7I>W^
zjI(|8EjE8lC$=b$-42Sn;4-f?*29wYcGW28?LiP*=rj$L@S#D0h~OVQ8N;uxeu0Fa
zuu{3q21v!n{IYx}KUKGUbyYQR!(r>URK6$#TN0Tz#{&fo12$x=&IPX)AvR~Fj`!y5
zd-i%jbQ_-&LnULW-ial-mgL@7`*9DSR)P$6pOEHICp_#A+!#AQ;og^JKc=do!8DD6
zi-9c2qU*zO@!?5eUBoL)+ODDfQL9JQ2LBFx@SzhJF@~ENw!nhI-16?voktGN$K$R!
zoU)QAg(G3}(2aTGU#!-6xtlZbf!4r|W}vcj77Luy!U8wr$>6k3b?8M670Qr=AOidL
z4m!5}yuW&%7G?W_cYkLq>6F5qPF
zwKgssF80~e@OF&nhW9sRI80cufAq^`4hlx;toT|gj@;+&G30`{4Pos^R}v^Rs$DV6H_L@>WN_na>{!h_w#1=&AT@1$wnYgPZ$
z>h&sIEjX~u;R>j*HV=`eRg?WFhIdKA-=M_!i7O!_c(Jc4_eUia(pItn%4IHA9a$Na
zKEs2uM^u34(1V2L#E%-BOWw>`NrC?UQdnZqboq}8L*!%0atZxp@kBL4w+0$yl{&*$
z^EXvrjtOT1g)X&Y(=WNiHL-gizK!goA<~h_BjF0wKQ((QoVtp(bMDF$^9p>F-9g>x
zmw)$WY-u*49ULNOk{g1QP!7Htg=038W3e0guZU_lX|Y$rZdE^$SAKuG@_pq%!q~j!J4@-`X!&2nMLZ!QSn%l%N?km1`djk=Y
zkBa{`y{|W}q6r^d9-s9rY&DkS4aKX*R3MoXcD`doe?`{sZOkPqe7?p3|
zmA~66xc!`ROI+tbbuODd*c`KAai-9k|0GaWxWAXYt~w<<&p&-D)xbfFC^|EYn(EfL
zN#Q+exQ})!4z#y1IXT}=ZK`*w)f@WfS}A-b;F9h~uF*YFcGBkO!95B*Res_!(&_z|
z#gnzjdWyrR(i=yKg6$QAU5%AytQN>jSh+H9=CUSNWOioAuCyHd=P9Vrb#ytdFRJnY
zD%jwMdz!J&)IQc5sA!9z0Sb>^5+;=_khjc7Mz?=~be&g3#hz#ZqX@E&b)-liSCFA3
zqa9!fh3Pe#0Djc?6yv{gcL@$^$J&Xy&}TsM$*pTpD`)^}{`nC1HJB*aT2Y7?4{6$N
zWBn3ZToE26-6{}d`9$il=ZaXrPW&CP;zx$)tHK`1?|cTFVNUlw@=y<^bS|IaPQL_U
z!B2G%_g~@}eUEDiBKd>g7BnqH2#R7H#41mR@f#)M{}pL_Y$F&bWLG
z%Jh&nHYQhtVM$9^b!cMDm|@FftkYGHXM#obcKtcs>??z&8*V1NHGercd);&0AY7-i
zWGo&G0*kRh?%NjsYGJROUXfRt#)xoXW
z6MW9-jqZ+f5Hv5cSzIn~o!Bu!d|E#S{69m*yS+8x(emuEDJ@^Tw9c`Z%AJ3}+I77w
zjvxKS0^C(ZtT%b9W~s$%k)dHBigNE&!4cPZ^vM__$b`n-uY72{Bo_CKMK{YY(K!dB&f6e0#brl%k((%N;tr2-M}Y2&syp5577S1q19Ip5{TK;|!1^@GQ82
zEHDZiR(2a`H9E&Y-(-Y}HV=6c_>frI3z{Y5U^*H!U?-R0vhWDFIwRrevicy76zy^Y
z52Oppt(myyd&i*E#|-3o63E1Xx(im?Gg*#0J2L26exiM~N(@->1II
zKD)yTzwTuQn5%lpjruJ#D8r3gr#<{iN%%?PMo&G$2b=1?^e
zAP0*ZWzgj>7o8%lZhb8EsE=O_>tUvJF_1OaKm3eT6xGUFV)#|O|2UHSiY4_WgjLlrt-acU)a+8+YZu6W-%y=?nLgu7HN{A#im2SeNY8K*~5*lCo~A=CdE-DF#6QBB^%mkwP`m}
zq=jF+tkQn*Cj@s}e!Ky*@1IRo-{6Nqg(viU~f8QL<$^uN2&=Hiz42Cprn
zrns@jOa-~QLYom^EW2o)Ea>AFbVW{v3_6;}9BFIiHfEU~M*@#lcW<=nY;wq`c&05T
z+DV4_kWhyg=#bD4w!_vZVQztJV*=#+4Ow@e`50Dx*=#D%z^`2ahftIERMKMEMk`~o
zv;-k86JPV@_&K)BCOeF-OU!rJOv1Go-96f=5bL#z#Y-_lDjA1Xj{bRmkL+3Bzy6_$
z`dhw9@Tg?J$_k&hSr|WM`uP|I^^x=Xz_N&W7o&iH|AdQUnt8A;a+LSdEww=>k}hln
ziRx6rJ6cn)wBbs;Yy;3{D$MdA#+VW}?B$>34=mr44P&8^I(|~!RcU@5%2z2>@r&Wt)f(_t3295q)l9&UgUa^_rM9=7sXXm^-y_oC^&sQ21?nww{jouz
zCHXK_5VyywpZrVg+tu+$%7KV&+xY;M>eXjJ{R2+I<9?LD@1;Vqr;H~;!@olP>EDz2
zECyPo$dUKwA~ZHV{irzUGJYooR;k)Fb8BwW6}QzoNL=~J^QBUh+<88-*CS>wq3|d<
zR#^+D&d+-um3NmfDlbkyXpf8-y-WXlqsJZ^38m+$umO
zMXr;wk`WYeZ;|rzva)O%?*7wopI7J4;bLR&>VcWr??B6%;ok%Vk@fIzLboi+6nI=B
zrgmi4#i}IQPjVj@OVM8vhsAd~0*iffj$4ghC_}VP|E8UkaWb$`KSmgvZ0dsybT&>$
zCw|8x`s^mX#Up>#iQ#)U*Ys1Z(>w!sv>)Dj-c&D&FwD!5Q-(=fDK2`78%?l&6d>Bo
z0R2dWwbESTuRH^mnQFy-%s3Lusd#@lkudo|cnAx|qb+m6}D#Q2&}4b>}L3m4l6zX2C4$A
z6LyB-LZYS6N`i&=S9c#vlV-tg*{-0@8V~siee+L|LwX6OB;x1O>8!HJPv&Ku))891
zh$6|sp|4uOU2LkphnF$fy&YKm!`2H#XUudmjySWpAS1yAKP?%saz&4TjYZSVXBXlh
z;;eQkBR#b~SWdX)Mjjsnyg{h~{STz@o|H(dDzCYevA3^%tIt5n%bA~cp<)SMgt@y<
z0zWd}M2`x%C96EmT=cjLaWgKq#tPq3Dr6gU7P23XH46k}RE%O}F*iErI7r`@&D#H-
zZUs_B$5r=Js-iKYxb+bZ&POK$g$D@*Z3r0~8+v^N1dE*Kt6!Bo?90{nF~sQ)@iTai
zxM&3~bx0hPP_k&G*oe{kv!@VyNyf8VU6G@Mru{inugV
zd@1TX*dPur%w-NyeQx+0Wdnj?E+S7R&&6m%)6%qiS@JzgpEK~<2`tI6k>>pEDCbY<
zh`b0;nduJ!r_6fDju^bYKEY(6&*>F$ybMOEXuO{m#G-$MJ0Bd~*=G3Z=CcD~vqb*=
z?2jR&Yu(*_ot&8pA5OU)dw8pUo-G;nFC<4Y(!Do-Rdwh!Y;jz^;
zl71K=QS_9)cWRNzfa@F-lr0#!Ls?eAMGi{z`WHjL7=vI`?DOrDBET7uNX7E%b~ryS
z-d?fNlz~>dHirY91ajOED^=t6`9*F>k2Igm?BP<|sM-y2!N5Cm_9lx2iq{(&$X$!@
zjHG_xzG}A#X~zko{LbURlEN`6T$U;?mDF&`M_s*|K$tF=VT7FjkI$=M0sbiYi=i9G
zgyTSU(oit!^LgK
zR~~v}qdB=Dc9?G4Cr`gY-I>j6n5hQ)SX31+0{@8ZU}#I0IcZM*hUamcTrw9f-CS$d
zI-ZU7Wh4hH-kWf8_9kFGv5WkTW6)+n*P{{Ua@JOLl47NMr}v$r+)vPv_^Z+d(qod6
zt6_~(ra~w2F+F{XIepG<(30a;1;$d5o`NP+e~qcUndE>ZM;j&j`+5^>-%yoLVUkWg
z!OdUM_w*UCSK?8y=SoS)1LjTiqtAo$i;>@xW9nZr)cV=V4#h2aY&AI%F8C}lw9Kf`
z&A`6nbaGOdU~_4;4M<{qp%f$KHPiQ+ZgM`N=i>(l|S3<(rNy#
zx$K5g6_P^~yQ|~4y7d*ie7$ikt4f6NRP-%h=oNf3_HXc}5Q3w-6joWH+Njaza;{U!>tbc%oqFxqacd5?uVD>9RX>DpFK8iL%+xB
z1m|=qe^hA@_7G#@WhdDN+6CV$SGwWX%h?f_VkIQ(E)|W#lo2&Hj!byOY0leKBhrd9
zwuchzGmOdMMN)R)`YRs6%Nd?_88lBBiO=2eX8_63SA!3BuCOMB+V?e9nf00T(_=1Q
z#nMZhEExTa&JFKLIW96qcSnc38LZYxFeEEF)*NG-i4s;F#A$RRCRDkjkNn$~E5Pdk
zdcWF#EZ%tK>F-Ot_KfG*uI&|!%^FkzW%|^AD^<1u%?8dm9B!Gate>%e>%zN*+#|GE
z-Ne#AI#8m;v(AV6QCzpG@ADT9)p;kf*mCC4N5-IfoN~0
z#Z&m}L{IKMYmOW-%hS2r@r2wW9Oj7tXXF%!)3Bl%Xs&`q15MD~E)}mV%$+`N@1xv%
zVa$EGE=urt>U2Lcu!n#0(}IOsUxa1W)Cu#G^x^-iF!6u$42bj7dLgmLp8+`>w)Hd@
zk&6;@PxO@CX5khK+UW&z$lnl86pt{yStFWJyVsy3FWw;vo|?hEA87CU0GeCoRp-+Y
z{*+K#LD!`vA9kz&^jKF_NmO^xTXFT(HeLOOhF|LP2Zxp;GqS0qUpF$MZ`ty9mTD{b
z`Xx?p+lvaCEVGVu!^J2ENU3W7{CqojSzJ74`Bj!cPuAun|5Nn3VN-jO5GG5CwJiCv
z_@kueGax|s!%6vj7jWG6*16gfx%cA-7_`vUP^`L9_*J1cYMH!3zdRM`sl<`Kxa1Wx
ztvQp-iy6zxjt>Tj?gTR2INFJ;)Z{%h!~!6)TUeTvP|Lz?wX$y@?-
zv3yK?v8+z~E&Fds9UZJ<>^37Z0zHJ3-VgghVXg9O`i!+9dC`)MIsl50uih}+usI78
zdsn4?g-@v?V(E=#@$xRKKlv9q{hHd%%g*jGE{5Hd45L_A*$OCZyp$|$B;a$^DBd06
zt?C}D^)2@Hem?1YAP&etW93BIe)^ZWK$r{4?^+a*PT^jOto&Vln1b)So6qVAD%0z|f
zu2hM);r%C?^8*GQ>}Z854B7qc&blYMQ3?q(I0^(QPMg3tzB3(pTdYNZCmxNiwop=r
zN7sy^^KCVDSD`_BwYSk=K5JG$DTi5XZ(+!7wC_4U+!c4wS(YxCDMH^YL!M$nmWG->
zKUQ}KEHb(nC$WA8jB$~}+5g)?P#1*NPus=|_T-%Pi2rD*>R~b6fp$*#2aPUR^oBEi
zv5S@_BH*w}9y0`Px!%!cK3eQQZj<9})0d)%JDc%yFdAQTbj9yUU@Om#&V`N#C*vy{
zyk*adMI*b9z)w6&qLa`8=(a$-eydBLb2Fj%gJ{W5Ouy*ER7&zS%mTx0CUtdJs8H--
zXfFHbu?PAb<*pObJ_WrkNK++SNA{v?YNu5`6e1zr}-4{#L}_K=fW{4{___(&3XBEXOif&lJ(QxOv3!RA_x={
zBFYZIQ=Hj%kKrRIMFgRy7;C=}@4woch29N2t0;|(-lii0Y&t84yq%Gs1`MWW_q^VJ
z%10$cD=xmx7?0gz>FmR~m*GaUxT
z