Merge pull request #3642 from vladmandic/dev

Merge dev to master
vladmandic · Dec 24, 2024 · 123bf1f · 123bf1f
2 parents a2f5a39 + 56b29a3
commit 123bf1f
Show file tree

Hide file tree

Showing 204 changed files with 11,987 additions and 1,739 deletions.
diff --git a/.eslintrc.json b/.eslintrc.json
@@ -37,14 +37,19 @@
     "object-curly-newline":"off",
     "prefer-rest-params":"off",
     "prefer-destructuring":"off",
-    "radix":"off"
+    "radix":"off",
+    "node/shebang": "off"
   },
   "globals": {
     // asssets
     "panzoom": "readonly",
-    // script.js
+    // logger.js
     "log": "readonly",
     "debug": "readonly",
+    "error": "readonly",
+    "xhrGet": "readonly",
+    "xhrPost": "readonly",
+    // script.js
     "gradioApp": "readonly",
     "executeCallbacks": "readonly",
     "onAfterUiUpdate": "readonly",
@@ -87,7 +92,6 @@
     // settings.js
     "registerDragDrop": "readonly",
     // extraNetworks.js
-    "requestGet": "readonly",
     "getENActiveTab": "readonly",
     "quickApplyStyle": "readonly",
     "quickSaveStyle": "readonly",

diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -106,10 +106,14 @@ body:
         - StableDiffusion 1.5
         - StableDiffusion 2.1
         - StableDiffusion XL
-        - StableDiffusion 3
-        - PixArt
+        - StableDiffusion 3.x
         - StableCascade
+        - FLUX.1
+        - PixArt
         - Kandinsky
+        - Playground
+        - AuraFlow
+        - Any Video Model
         - Other
       default: 0
     validations:

diff --git a/.pylintrc b/.pylintrc
@@ -13,6 +13,7 @@ ignore-paths=/usr/lib/.*$,
              modules/control/units,
              modules/ctrlx,
              modules/dml,
+             modules/freescale,
              modules/ggml,
              modules/hidiffusion,
              modules/hijack,

diff --git a/.ruff.toml b/.ruff.toml
@@ -7,6 +7,7 @@ exclude = [
     "modules/consistory",
     "modules/control/proc",
     "modules/control/units",
+    "modules/freescale",
     "modules/ggml",
     "modules/hidiffusion",
     "modules/hijack",

diff --git a/CHANGELOG.md b/CHANGELOG.md
diff --git a/README.md b/README.md
@@ -1,5 +1,5 @@
 <div align="center">
-<img src="https://github.com/vladmandic/automatic/blob/master/html/logo-transparent.png" width=200 alt="SD.Next">
+<img src="https://github.com/vladmandic/automatic/raw/master/html/logo-transparent.png" width=200 alt="SD.Next">
 
 **Image Diffusion implementation with advanced features**
 
@@ -8,15 +8,16 @@
 [![Discord](https://img.shields.io/discord/1101998836328697867?logo=Discord&svg=true)](https://discord.gg/VjvR2tabEX)
 [![Sponsors](https://img.shields.io/static/v1?label=Sponsor&message=%E2%9D%A4&logo=GitHub&color=%23fe8e86)](https://github.com/sponsors/vladmandic)
 
-[Wiki](https://github.com/vladmandic/automatic/wiki) | [Discord](https://discord.gg/VjvR2tabEX) | [Changelog](CHANGELOG.md)
+[Docs](https://vladmandic.github.io/sdnext-docs/) | [Wiki](https://github.com/vladmandic/automatic/wiki) | [Discord](https://discord.gg/VjvR2tabEX) | [Changelog](CHANGELOG.md)
 
 </div>
 </br>
 
 ## Table of contents
 
+- [Documentation](https://vladmandic.github.io/sdnext-docs/)
 - [SD.Next Features](#sdnext-features)
-- [Model support](#model-support)
+- [Model support](#model-support) and [Specifications]()
 - [Platform support](#platform-support)
 - [Getting started](#getting-started)
 
@@ -25,7 +26,7 @@
 All individual features are not listed here, instead check [ChangeLog](CHANGELOG.md) for full list of changes
 - Multiple UIs!  
   ▹ **Standard | Modern**  
-- Multiple diffusion models!  
+- Multiple [diffusion models](https://vladmandic.github.io/sdnext-docs/Model-Support/)!  
 - Built-in Control for Text, Image, Batch and video processing!  
 - Multiplatform!  
  ▹ **Windows | Linux | MacOS | nVidia | AMD | IntelArc/IPEX | DirectML | OpenVINO | ONNX+Olive | ZLUDA**
@@ -34,9 +35,7 @@ All individual features are not listed here, instead check [ChangeLog](CHANGELOG
 - Platform specific autodetection and tuning performed on install  
 - Optimized processing with latest `torch` developments with built-in support for `torch.compile`  
   and multiple compile backends: *Triton, ZLUDA, StableFast, DeepCache, OpenVINO, NNCF, IPEX, OneDiff*  
-- Improved prompt parser  
 - Built-in queue management  
-- Enterprise level logging and hardened API  
 - Built in installer with automatic updates and dependency management  
 - Mobile compatible  
 
@@ -49,42 +48,13 @@ All individual features are not listed here, instead check [ChangeLog](CHANGELOG
 
 ![screenshot-modernui](https://github.com/user-attachments/assets/39e3bc9a-a9f7-4cda-ba33-7da8def08032)
 
-For screenshots and informations on other available themes, see [Themes Wiki](https://github.com/vladmandic/automatic/wiki/Themes)
+For screenshots and informations on other available themes, see [Themes](https://vladmandic.github.io/sdnext-docs/Themes/)
 
 <br>
 
 ## Model support
 
-Additional models will be added as they become available and there is public interest in them  
-See [models overview](https://github.com/vladmandic/automatic/wiki/Models) for details on each model, including their architecture, complexity and other info  
-
-- [RunwayML Stable Diffusion](https://github.com/Stability-AI/stablediffusion/) 1.x and 2.x *(all variants)*
-- [StabilityAI Stable Diffusion XL](https://github.com/Stability-AI/generative-models), [StabilityAI Stable Diffusion 3.0](https://stability.ai/news/stable-diffusion-3-medium) Medium, [StabilityAI Stable Diffusion 3.5](https://huggingface.co/stabilityai/stable-diffusion-3.5-large) Medium, Large, Large Turbo
-- [StabilityAI Stable Video Diffusion](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid) Base, XT 1.0, XT 1.1
-- [StabilityAI Stable Cascade](https://github.com/Stability-AI/StableCascade) *Full* and *Lite*
-- [Black Forest Labs FLUX.1](https://blackforestlabs.ai/announcing-black-forest-labs/) Dev, Schnell  
-- [AuraFlow](https://huggingface.co/fal/AuraFlow)
-- [AlphaVLLM Lumina-Next-SFT](https://huggingface.co/Alpha-VLLM/Lumina-Next-SFT-diffusers)  
-- [Playground AI](https://huggingface.co/playgroundai/playground-v2-256px-base) *v1, v2 256, v2 512, v2 1024 and latest v2.5*
-- [Tencent HunyuanDiT](https://github.com/Tencent/HunyuanDiT)
-- [OmniGen](https://arxiv.org/pdf/2409.11340)  
-- [Meissonic](https://github.com/viiika/Meissonic)  
-- [Kwai Kolors](https://huggingface.co/Kwai-Kolors/Kolors)  
-- [CogView 3+](https://huggingface.co/THUDM/CogView3-Plus-3B)
-- [LCM: Latent Consistency Models](https://github.com/openai/consistency_models)
-- [aMUSEd](https://huggingface.co/amused/amused-256) 256 and 512
-- [Segmind Vega](https://huggingface.co/segmind/Segmind-Vega), [Segmind SSD-1B](https://huggingface.co/segmind/SSD-1B), [Segmind SegMoE](https://github.com/segmind/segmoe) *SD and SD-XL*, [Segmind SD Distilled](https://huggingface.co/blog/sd_distillation) *(all variants)*
-- [Kandinsky](https://github.com/ai-forever/Kandinsky-2) *2.1 and 2.2 and latest 3.0*
-- [PixArt-α XL 2](https://github.com/PixArt-alpha/PixArt-alpha) *Medium and Large*, [PixArt-Σ](https://github.com/PixArt-alpha/PixArt-sigma)
-- [Warp Wuerstchen](https://huggingface.co/blog/wuertschen)
-- [Tsinghua UniDiffusion](https://github.com/thu-ml/unidiffuser)
-- [DeepFloyd IF](https://github.com/deep-floyd/IF) *Medium and Large*
-- [ModelScope T2V](https://huggingface.co/damo-vilab/text-to-video-ms-1.7b)
-- [BLIP-Diffusion](https://dxli94.github.io/BLIP-Diffusion-website/)
-- [KOALA 700M](https://github.com/youngwanLEE/sdxl-koala)
-- [VGen](https://huggingface.co/ali-vilab/i2vgen-xl)
-- [SDXS](https://github.com/IDKiro/sdxs)
-- [Hyper-SD](https://huggingface.co/ByteDance/Hyper-SD)
+SD.Next supports broad range of models: [supported models](https://vladmandic.github.io/sdnext-docs/Model-Support/) and [model specs](https://vladmandic.github.io/sdnext-docs/Models/)  
 
 ## Platform support
 
@@ -97,47 +67,29 @@ See [models overview](https://github.com/vladmandic/automatic/wiki/Models) for d
 - Any GPU or device compatible with **OpenVINO** libraries on both *Windows and Linux*  
 - *Apple M1/M2* on *OSX* using built-in support in Torch with **MPS** optimizations  
 - *ONNX/Olive*  
-- *AMD* GPUs on Windows using **ZLUDA** libraries
+- *AMD* GPUs on Windows using **ZLUDA** libraries  
 
 ## Getting started
 
-- Get started with **SD.Next** by following the [installation instructions](https://github.com/vladmandic/automatic/wiki/Installation)  
-- For more details, check out [advanced installation](https://github.com/vladmandic/automatic/wiki/Advanced-Install) guide  
-- List and explanation of [command line arguments](https://github.com/vladmandic/automatic/wiki/CLI-Arguments)
+- Get started with **SD.Next** by following the [installation instructions](https://vladmandic.github.io/sdnext-docs/Installation/)  
+- For more details, check out [advanced installation](https://vladmandic.github.io/sdnext-docs/Advanced-Install/) guide  
+- List and explanation of [command line arguments](https://vladmandic.github.io/sdnext-docs/CLI-Arguments/)
 - Install walkthrough [video](https://www.youtube.com/watch?v=nWTnTyFTuAs)
 
 > [!TIP]
 > And for platform specific information, check out  
-> [WSL](https://github.com/vladmandic/automatic/wiki/WSL) | [Intel Arc](https://github.com/vladmandic/automatic/wiki/Intel-ARC) | [DirectML](https://github.com/vladmandic/automatic/wiki/DirectML) | [OpenVINO](https://github.com/vladmandic/automatic/wiki/OpenVINO) | [ONNX & Olive](https://github.com/vladmandic/automatic/wiki/ONNX-Runtime) | [ZLUDA](https://github.com/vladmandic/automatic/wiki/ZLUDA) | [AMD ROCm](https://github.com/vladmandic/automatic/wiki/AMD-ROCm) | [MacOS](https://github.com/vladmandic/automatic/wiki/MacOS-Python.md) | [nVidia](https://github.com/vladmandic/automatic/wiki/nVidia)
+> [WSL](https://vladmandic.github.io/sdnext-docs/WSL/) | [Intel Arc](https://vladmandic.github.io/sdnext-docs/Intel-ARC/) | [DirectML](https://vladmandic.github.io/sdnext-docs/DirectML/) | [OpenVINO](https://vladmandic.github.io/sdnext-docs/OpenVINO/) | [ONNX & Olive](https://vladmandic.github.io/sdnext-docs/ONNX-Runtime/) | [ZLUDA](https://vladmandic.github.io/sdnext-docs/ZLUDA/) | [AMD ROCm](https://vladmandic.github.io/sdnext-docs/AMD-ROCm/) | [MacOS](https://vladmandic.github.io/sdnext-docs/MacOS-Python/) | [nVidia](https://vladmandic.github.io/sdnext-docs/nVidia/) | [Docker](https://vladmandic.github.io/sdnext-docs/Docker/)
 
 > [!WARNING]
-> If you run into issues, check out [troubleshooting](https://github.com/vladmandic/automatic/wiki/Troubleshooting) and [debugging](https://github.com/vladmandic/automatic/wiki/Debug) guides  
+> If you run into issues, check out [troubleshooting](https://vladmandic.github.io/sdnext-docs/Troubleshooting/) and [debugging](https://vladmandic.github.io/sdnext-docs/Debug/) guides  
 
 > [!TIP]
-> All command line options can also be set via env variable
+> All command line options can also be set via env variable  
 > For example `--debug` is same as `set SD_DEBUG=true`  
 
-## Backend support
-
-**SD.Next** supports two main backends: *Diffusers* and *Original*:
-
-- **Diffusers**: Based on new [Huggingface Diffusers](https://huggingface.co/docs/diffusers/index) implementation  
-  Supports *all* models listed below  
-  This backend is set as default for new installations  
-- **Original**: Based on [LDM](https://github.com/Stability-AI/stablediffusion) reference implementation and significantly expanded on by [A1111](https://github.com/AUTOMATIC1111/stable-diffusion-webui)  
-  This backend and is fully compatible with most existing functionality and extensions written for *A1111 SDWebUI*  
-  Supports **SD 1.x** and **SD 2.x** models  
-  All other model types such as *SD-XL, LCM, Stable Cascade, PixArt, Playground, Segmind, Kandinsky, etc.* require backend **Diffusers**  
-
-### Collab
-
-- We'd love to have additional maintainers (with comes with full repo rights). If you're interested, ping us!  
-- In addition to general cross-platform code, desire is to have a lead for each of the main platforms  
-This should be fully cross-platform, but we'd really love to have additional contributors and/or maintainers to join and help lead the efforts on different platforms  
-
 ### Credits
 
-- Main credit goes to [Automatic1111 WebUI](https://github.com/AUTOMATIC1111/stable-diffusion-webui) for original codebase  
+- Main credit goes to [Automatic1111 WebUI](https://github.com/AUTOMATIC1111/stable-diffusion-webui) for the original codebase  
 - Additional credits are listed in [Credits](https://github.com/AUTOMATIC1111/stable-diffusion-webui/#credits)  
 - Licenses for modules are listed in [Licenses](html/licenses.html)  
 
@@ -154,8 +106,8 @@ This should be fully cross-platform, but we'd really love to have additional con
 
 ### Docs
 
-If you're unsure how to use a feature, best place to start is [Wiki](https://github.com/vladmandic/automatic/wiki) and if its not there,  
-check [ChangeLog](CHANGELOG.md) for when feature was first introduced as it will always have a short note on how to use it  
+If you're unsure how to use a feature, best place to start is [Docs](https://vladmandic.github.io/sdnext-docs/) and if its not there,  
+check [ChangeLog](https://vladmandic.github.io/sdnext-docs/CHANGELOG/) for when feature was first introduced as it will always have a short note on how to use it  
 
 ### Sponsors
 

diff --git a/TODO.md b/TODO.md
@@ -2,21 +2,30 @@
 
 Main ToDo list can be found at [GitHub projects](https://github.com/users/vladmandic/projects)
 
-## Future Candidates
+## Pending
 
-- SD35 IPAdapter: <https://github.com/huggingface/diffusers/issues/9966>
-- SD35 LoRA: <https://github.com/huggingface/diffusers/issues/9950>
-- Flux IPAdapter: <https://github.com/huggingface/diffusers/issues/9825>
-- Flux Fill/ControlNet/Redux: <https://github.com/huggingface/diffusers/pull/9985>
-- Flux NF4: <https://github.com/huggingface/diffusers/issues/9996>
-- SANA: <https://github.com/huggingface/diffusers/pull/9982>
+- LoRA direct with caching
+- Previewer issues
+- Redesign postprocessing
 
-## Other
+## Future Candidates
 
+- Flux NF4 loader: <https://github.com/huggingface/diffusers/issues/9996>
 - IPAdapter negative: <https://github.com/huggingface/diffusers/discussions/7167>
 - Control API enhance scripts compatibility
+- PixelSmith: <https://github.com/Thanos-DB/Pixelsmith>
 
-## Workaround in place
+## Code TODO
 
-- GGUF <https://github.com/huggingface/diffusers/issues/9487>
-- FlowMatch <https://github.com/huggingface/diffusers/issues/9607> <https://github.com/huggingface/diffusers/issues/9924>
+- TODO install: python 3.12.4 or higher cause a mess with pydantic
+- TODO install: enable ROCm for windows when available
+- TODO resize image: enable full VAE mode for resize-latent
+- TODO processing: remove duplicate mask params
+- TODO flux: fix loader for civitai nf4 models
+- TODO model loader: implement model in-memory caching
+- TODO hypertile: vae breaks when using non-standard sizes
+- TODO model load: force-reloading entire model as loading transformers only leads to massive memory usage
+- TODO lora load: direct with bnb
+- TODO lora make: support quantized flux
+- TODO control: support scripts via api
+- TODO modernui: monkey-patch for missing tabs.select event
diff --git a/cli/api-model.js b/cli/api-model.js
@@ -0,0 +1,30 @@
+#!/usr/bin/env node
+
+const sd_url = process.env.SDAPI_URL || 'http://127.0.0.1:7860';
+const sd_username = process.env.SDAPI_USR;
+const sd_password = process.env.SDAPI_PWD;
+const models = [
+  '/mnt/models/stable-diffusion/sd15/lyriel_v16.safetensors',
+  '/mnt/models/stable-diffusion/flux/flux-finesse_v2-f1h-fp8.safetensors',
+  '/mnt/models/stable-diffusion/sdxl/TempestV0.1-Artistic.safetensors',
+];
+
+async function options(data) {
+  const method = 'POST';
+  const headers = new Headers();
+  const body = JSON.stringify(data);
+  headers.set('Content-Type', 'application/json');
+  if (sd_username && sd_password) headers.set({ Authorization: `Basic ${btoa('sd_username:sd_password')}` });
+  const res = await fetch(`${sd_url}/sdapi/v1/options`, { method, headers, body });
+  return res;
+}
+
+async function main() {
+  for (const model of models) {
+    console.log('model:', model);
+    const res = await options({ sd_model_checkpoint: model });
+    console.log('result:', res);
+  }
+}
+
+main();
diff --git a/cli/api-pulid.js b/cli/api-pulid.js
@@ -10,12 +10,13 @@ const argparse = require('argparse');
 const sd_url = process.env.SDAPI_URL || 'http://127.0.0.1:7860';
 const sd_username = process.env.SDAPI_USR;
 const sd_password = process.env.SDAPI_PWD;
+let args = {};
 
 function b64(file) {
   const data = fs.readFileSync(file);
-  const b64 = Buffer.from(data).toString('base64');
+  const b64str = Buffer.from(data).toString('base64');
   const ext = path.extname(file).replace('.', '');
-  str = `data:image/${ext};base64,${b64}`;
+  const str = `data:image/${ext};base64,${b64str}`;
   // console.log('b64:', ext, b64.length);
   return str;
 }
@@ -39,7 +40,16 @@ function options() {
   if (args.pulid) {
     const b64image = b64(args.pulid);
     opt.script_name = 'pulid';
-    opt.script_args = [b64image, 0.9];
+    opt.script_args = [
+      b64image, // b64 encoded image, required param
+      0.9, // strength, optional
+      20, // zero, optional
+      'dpmpp_sde', // sampler, optional
+      'v2', // ortho, optional
+      true, // restore (disable pulid after run), optional
+      true, // offload, optional
+      'v1.1', // version, optional
+    ];
   }
   // console.log('options:', opt);
   return opt;
@@ -53,8 +63,8 @@ function init() {
   parser.add_argument('--height', { type: 'int', help: 'height' });
   parser.add_argument('--pulid', { type: 'str', help: 'pulid init image' });
   parser.add_argument('--output', { type: 'str', help: 'output path' });
-  const args = parser.parse_args();
-  return args
+  const parsed = parser.parse_args();
+  return parsed;
 }
 
 async function main() {
@@ -73,12 +83,12 @@ async function main() {
     console.log('result:', json.info);
     for (const i in json.images) { // eslint-disable-line guard-for-in
       const file = args.output || `/tmp/test-${i}.jpg`;
-      const data = atob(json.images[i])
+      const data = atob(json.images[i]);
       fs.writeFileSync(file, data, 'binary');
       console.log('image saved:', file);
     }
   }
 }
 
-const args = init();
+args = init();
 main();
diff --git a/cli/full-test.sh b/cli/full-test.sh
@@ -1,5 +1,8 @@
 #!/usr/bin/env bash
 
+node cli/api-txt2img.js
+node cli/api-pulid.js
+
 source venv/bin/activate
 echo image-exif
 python cli/api-info.py --input html/logo-bg-0.jpg

diff --git a/cli/load-unet.py b/cli/load-unet.py
@@ -33,13 +33,13 @@ def set_module_tensor(
             stats.dtypes[value.dtype] = 0
         stats.dtypes[value.dtype] += 1
         if name in module._buffers: # pylint: disable=protected-access
-            module._buffers[name] = value.to(device=device, dtype=dtype, non_blocking=True) # pylint: disable=protected-access
+            module._buffers[name] = value.to(device=device, dtype=dtype) # pylint: disable=protected-access
             if 'buffers' not in stats.weights:
                 stats.weights['buffers'] = 0
             stats.weights['buffers'] += 1
         elif value is not None:
             param_cls = type(module._parameters[name]) # pylint: disable=protected-access
-            module._parameters[name] = param_cls(value, requires_grad=old_value.requires_grad).to(device, dtype=dtype, non_blocking=True) # pylint: disable=protected-access
+            module._parameters[name] = param_cls(value, requires_grad=old_value.requires_grad).to(device, dtype=dtype) # pylint: disable=protected-access
             if 'parameters' not in stats.weights:
                 stats.weights['parameters'] = 0
             stats.weights['parameters'] += 1

diff --git a/configs/flux/vae/config.json b/configs/flux/vae/config.json
@@ -14,7 +14,7 @@
     "DownEncoderBlock2D",
     "DownEncoderBlock2D"
   ],
-  "force_upcast": true,
+  "force_upcast": false,
   "in_channels": 3,
   "latent_channels": 16,
   "latents_mean": null,

diff --git a/configs/sd15/vae/config.json b/configs/sd15/vae/config.json
@@ -14,6 +14,7 @@
     "DownEncoderBlock2D",
     "DownEncoderBlock2D"
   ],
+  "force_upcast": false,
   "in_channels": 3,
   "latent_channels": 4,
   "layers_per_block": 2,