ROCm support (#5)

General: * ROCm support Misc: * Update README.md * Update repositories
sasha0552 · Dec 29, 2023 · ce338ac · ce338ac
1 parent c362dac
commit ce338ac
Show file tree

Hide file tree

Showing 25 changed files with 240 additions and 49 deletions.
diff --git a/.ci/configure.py b/.ci/configure.py
@@ -0,0 +1,44 @@
+import glob
+import jinja2
+import sys
+
+def render_template(filepath, **options):
+  if filepath.endswith(".jinja2"):
+    # read input file
+    with open(filepath, "r") as file:
+      template = jinja2.Template(file.read())
+
+    # render template
+    rendered = template.render(**options)
+
+    # write output file
+    with open(filepath[:-7], "w") as file:
+      file.write(rendered)
+
+def main():
+  # by default, use cuda
+  cuda = True
+  rocm = False
+
+  # enable rocm if specified
+  if len(sys.argv) == 2:
+    if sys.argv[1] == "rocm":
+      cuda = False
+      rocm = True
+
+  # list of rendered files
+  rendered = []
+
+  # render every file
+  for filepath in glob.glob("**/*.jinja2", recursive=True) + [".gitignore.jinja2"]:
+    # render file
+    render_template(filepath, CUDA=cuda, ROCm=rocm, rendered=rendered)
+
+    # add output file to rendered list
+    rendered.append(filepath[:-7])
+
+    # print status
+    print(f"File '{filepath}' rendered successfully")
+
+if __name__ == "__main__":
+  main()
diff --git a/.github/workflows/build-iso-cuda.yml b/.github/workflows/build-iso-cuda.yml
@@ -35,7 +35,7 @@ jobs:
             pacman --sync --noconfirm --sysupgrade
 
             # Install required packages
-            pacman --sync --noconfirm --needed archiso patch
+            pacman --sync --noconfirm --needed archiso patch python python-jinja
 
             # Apply patch to archiso
             patch -p0 << 'EOF'
@@ -54,6 +54,11 @@ jobs:
              # export build artifacts for netboot
             EOF
 
+            # Configure to use CUDA
+            pushd /workspace
+              python3 .ci/configure.py cuda
+            popd
+
             # Build image
             mkarchiso -v -m iso -w /workspace/work -o /workspace/out /workspace
 

diff --git a/.github/workflows/build-iso-rocm.yml b/.github/workflows/build-iso-rocm.yml
@@ -0,0 +1,69 @@
+name: Build ISO (ROCm)
+
+on:
+  - push
+  - pull_request
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Cleanup
+        uses: rokibhasansagar/slimhub_actions@main
+        with:
+          retain: "docker_imgcache,docker_buildkit,docker_imgcache"
+
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          submodules: recursive
+
+      - name: Build image
+        uses: addnab/docker-run-action@v3
+        with:
+          image: archlinux:latest
+          options: --privileged --volume ${{ github.workspace }}:/workspace
+          run: |
+            # Exit on error
+            set -eu
+
+            # Refresh package databases
+            pacman --sync --noconfirm --refresh
+
+            # Upgrade system
+            pacman --sync --noconfirm --sysupgrade
+
+            # Install required packages
+            pacman --sync --noconfirm --needed archiso patch python python-jinja
+
+            # Apply patch to archiso
+            patch -p0 << 'EOF'
+            --- /usr/bin/mkarchiso
+            +++ /usr/bin/mkarchiso
+            @@ -1227,6 +1227,10 @@
+                 if [[ -v cert_list ]]; then
+                     _cms_sign_artifact "${airootfs_image_filename}"
+                 fi
+            +
+            +    _msg_info 'Removing the pacstrap directory...'
+            +    rm -rf -- "${pacstrap_dir:?}/"
+            +    _msg_info 'Done!'
+             }
+
+             # export build artifacts for netboot
+            EOF
+
+            # Configure to use ROCm
+            pushd /workspace
+              python3 .ci/configure.py rocm
+            popd
+
+            # Build image
+            mkarchiso -v -m iso -w /workspace/work -o /workspace/out /workspace
+
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: archiso-output
+          path: out/
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,13 @@
 out/
 work/
+
+# rendered files
+packages.x86_64
+airootfs/root/customize_airootfs.sh
+airootfs/root/customize_airootfs/scripts/1000-axolotl-dependencies.sh
+airootfs/root/customize_airootfs/scripts/0100-koboldcpp-patches.sh
+airootfs/root/customize_airootfs/scripts/1000-sillytavern-extras-dependencies.sh
+airootfs/root/customize_airootfs/scripts/1000-vllm-dependencies.sh
+airootfs/root/customize_airootfs/scripts/1000-text-generation-webui-dependencies.sh
+airootfs/root/customize_airootfs/scripts/0100-automatic-patches.sh
+airootfs/root/customize_airootfs/scripts/9999-cleanup.sh
diff --git a/.gitignore.jinja2 b/.gitignore.jinja2
@@ -0,0 +1,7 @@
+out/
+work/
+
+# rendered files
+{% for file in rendered %}
+{{- file}}
+{% endfor %}
diff --git a/README.md b/README.md
@@ -15,18 +15,20 @@ If you would like to see another AI-related project included in ToriLinux, pleas
 
 * Easy setup: just boot the ISO, and you will have a working setup for training and/or inferencing Large Language Models/Stable Diffusion/etc.
 * Fully offline training and/or inference.
-* Includes performance state switcher, which reduces GPU temperatures when inference is not running (only automatic & koboldcpp for now).
+* Includes performance state switcher, which reduces GPU temperatures when inference is not running (only NVIDIA, only automatic & koboldcpp for now).
 
 ## Usage
 
 To use ToriLinux:
 1. Install [Ventoy](https://ventoy.net/en/doc_start.html) on a USB drive.
-2. Download the latest ISO from [workflows](https://github.com/sasha0552/ToriLinux/actions?query=branch%3Amain) and copy it to the USB drive.
+2. Download the latest ISO from workflows ([NVIDIA](https://github.com/sasha0552/ToriLinux/actions/workflows/build-iso-cuda.yml?query=branch%3Amain) / [AMD](https://github.com/sasha0552/ToriLinux/actions/workflows/build-iso-rocm.yml?query=branch%3Amain)) and copy it to the USB drive.
 3. Boot from the USB drive (select it as the boot device in BIOS/UEFI).
 4. Log in with the username `tori` and password `tori`. You can also use [SSH](https://en.wikipedia.org/wiki/Secure_Shell).
 
-Please note that ToriLinux currently works only with NVIDIA GPUs. If you would like a ROCm (AMD GPUs) version, please open an issue, and I'll make one.
-
 ## Misc
 
 Note that you need pre-downloaded models on a local hard drive or NFS server, or enough RAM and internet connection to download models directly into RAM.
+
+Note that following projects is not available on ROCm version:
+* [axolotl](https://github.com/OpenAccess-AI-Collective/axolotl)
+* [vllm](https://github.com/vllm-project/vllm)
diff --git a/airootfs/home/tori/axolotl b/airootfs/home/tori/axolotl
diff --git a/airootfs/home/tori/llama.cpp b/airootfs/home/tori/llama.cpp
diff --git a/airootfs/home/tori/text-generation-webui b/airootfs/home/tori/text-generation-webui
diff --git a/airootfs/home/tori/vllm b/airootfs/home/tori/vllm
diff --git a/airootfs/root/customize_airootfs.sh → airootfs/root/customize_airootfs.sh.jinja2 b/airootfs/root/customize_airootfs.sh → airootfs/root/customize_airootfs.sh.jinja2
@@ -7,6 +7,14 @@ mv /usr/lib/os-release.new /usr/lib/os-release
 # set user password
 echo "tori:tori" | chpasswd
 
+# remove any jinja2 files
+find -type f -name "*.jinja2" -print -delete
+
+{% if ROCm %}
+# remove nvidia-persistenced if rocm
+rm -f /etc/systemd/system/multi-user.target.wants/nvidia-persistenced.service
+{% endif %}
+
 # enter user directory
 cd "/home/tori"
 

diff --git a/airootfs/root/customize_airootfs/patches/0100-llamacpp-enable-prompt-cache.patch b/airootfs/root/customize_airootfs/patches/0100-llamacpp-enable-prompt-cache.patch
@@ -9,7 +9,7 @@
 
      uint32_t seed      = -1; // RNG seed
      int32_t  n_keep    =  0; // number of tokens to keep from initial prompt
-@@ -712,7 +712,7 @@ struct llama_server_context
+@@ -711,7 +711,7 @@ struct llama_server_context
          }
 
          slot->params.stream           = json_value(data, "stream",            false);
@@ -18,12 +18,12 @@
          slot->params.n_predict        = json_value(data, "n_predict",         default_params.n_predict);
          slot->sparams.top_k           = json_value(data, "top_k",             default_sparams.top_k);
          slot->sparams.top_p           = json_value(data, "top_p",             default_sparams.top_p);
-@@ -2439,7 +2439,7 @@ json oaicompat_completion_params_parse(
-     // Map OpenAI parameters to llama.cpp parameters
+@@ -2446,7 +2446,7 @@ json oaicompat_completion_params_parse(
+     llama_sampling_params default_sparams;
      llama_params["model"]             = json_value(body, "model", std::string("uknown"));
      llama_params["prompt"]            = format_chatml(body["messages"]); // OpenAI 'messages' to llama.cpp 'prompt'
 -    llama_params["cache_prompt"]      = json_value(body, "cache_prompt", false);
 +    llama_params["cache_prompt"]      = json_value(body, "cache_prompt", true);
-     llama_params["temperature"]       = json_value(body, "temperature", 0.8);
-     llama_params["top_k"]             = json_value(body, "top_k", 40);
-     llama_params["top_p"]             = json_value(body, "top_p", 0.95);
+     llama_params["temperature"]       = json_value(body, "temperature", 0.0);
+     llama_params["top_k"]             = json_value(body, "top_k", default_sparams.top_k);
+     llama_params["top_p"]             = json_value(body, "top_p", 1.0);
diff --git a/.../patches/0100-vllm-build-for-pascal.patch → ...atches/0100-vllm-enable-other-archs.patch b/.../patches/0100-vllm-build-for-pascal.patch → ...atches/0100-vllm-enable-other-archs.patch
diff --git a/...irootfs/scripts/0100-automatic-patches.sh → .../scripts/0100-automatic-patches.sh.jinja2 b/...irootfs/scripts/0100-automatic-patches.sh → .../scripts/0100-automatic-patches.sh.jinja2
@@ -14,6 +14,8 @@ pushd "automatic"
   sed -i 's/lambda: {"choices": theme.list_themes()}, refresh=theme.refresh_themes/{"choices": ["black-teal"]}/g' modules/shared.py
   sed -i 's/shared.opts.motd/False/g' modules/api/api.py
 
+{% if CUDA %}
   # drop pstate in idle
   patch -p1 < "$CUSTOMIZE_AIROOTFS/patches/0000-automatic-drop-pstate-in-idle.patch"
+{% endif %}
 popd
diff --git a/...irootfs/scripts/0100-koboldcpp-patches.sh → .../scripts/0100-koboldcpp-patches.sh.jinja2 b/...irootfs/scripts/0100-koboldcpp-patches.sh → .../scripts/0100-koboldcpp-patches.sh.jinja2
@@ -3,6 +3,8 @@ set -eu
 
 # koboldcpp patches
 pushd "koboldcpp"
+{% if CUDA %}
   # drop pstate in idle
   patch -p1 < "$CUSTOMIZE_AIROOTFS/patches/0000-koboldcpp-drop-pstate-in-idle.patch"
+{% endif %}
 popd
diff --git a/airootfs/root/customize_airootfs/scripts/0100-vllm-patches.sh b/airootfs/root/customize_airootfs/scripts/0100-vllm-patches.sh
@@ -3,6 +3,6 @@ set -eu
 
 # vllm patches
 pushd "vllm"
-  # build for pascal
-  patch -p1 < "$CUSTOMIZE_AIROOTFS/patches/0100-vllm-build-for-pascal.patch"
+  # enable other architectures
+  patch -p1 < "$CUSTOMIZE_AIROOTFS/patches/0100-vllm-enable-other-archs.patch"
 popd
diff --git a/airootfs/root/customize_airootfs/scripts/1000-automatic-dependencies.sh b/airootfs/root/customize_airootfs/scripts/1000-automatic-dependencies.sh
@@ -14,4 +14,10 @@ pushd "automatic"
     # install dependencies
     python3 launch.py --test
   deactivate
+
+  # remove installation config
+  rm config.json
+
+  # remove installation log
+  rm sdnext.log
 popd
diff --git a/...otfs/scripts/1000-axolotl-dependencies.sh → ...ripts/1000-axolotl-dependencies.sh.jinja2 b/...otfs/scripts/1000-axolotl-dependencies.sh → ...ripts/1000-axolotl-dependencies.sh.jinja2
@@ -3,6 +3,7 @@ set -eu
 
 # axolotl dependencies
 pushd "axolotl"
+{% if CUDA %}
   # disable package caching
   export PIP_NO_CACHE_DIR=0
 
@@ -33,4 +34,5 @@ pushd "axolotl"
     # downgrade flash-attn (https://github.com/OpenAccess-AI-Collective/axolotl/issues/911#issuecomment-1868546443)
     pip3 install flash-attn==2.3.2
   deactivate
+{% endif %}
 popd
diff --git a/...s/1000-sillytavern-extras-dependencies.sh → ...sillytavern-extras-dependencies.sh.jinja2 b/...s/1000-sillytavern-extras-dependencies.sh → ...sillytavern-extras-dependencies.sh.jinja2
@@ -11,8 +11,17 @@ pushd "SillyTavern-Extras"
 
   # activate venv
   source venv/bin/activate
-    # install dependencies
+{% if CUDA %}
+    # install dependencies (cuda)
     pip3 install -r requirements.txt
+{% endif %}
+
+{% if ROCm %}
+    # install dependencies (rocm)
+    pip3 install -r requirements-rocm.txt
+{% endif %}
+
+    # install remaining dependencies
     pip3 install -r requirements-coqui.txt
     pip3 install -r requirements-rvc.txt
   deactivate

diff --git a/airootfs/root/customize_airootfs/scripts/1000-text-generation-webui-dependencies.sh b/airootfs/root/customize_airootfs/scripts/1000-text-generation-webui-dependencies.sh
diff --git a/airootfs/root/customize_airootfs/scripts/1000-text-generation-webui-dependencies.sh.jinja2 b/airootfs/root/customize_airootfs/scripts/1000-text-generation-webui-dependencies.sh.jinja2
@@ -0,0 +1,30 @@
+#!/bin/bash
+set -eu
+
+# text-generation-webui dependencies
+pushd "text-generation-webui"
+  # disable package caching
+  export PIP_NO_CACHE_DIR=0
+
+  # create venv
+  python3 -m venv venv
+
+  # activate venv
+  source venv/bin/activate
+{% if CUDA %}
+    # install dependencies (cuda)
+    pip3 install -r requirements.txt
+{% endif %}
+
+{% if ROCm %}
+    # extract pytorch version
+    index_url=$(grep -o 'https://download.pytorch.org/whl/rocm[0-9.]*' one_click.py)
+
+    # install pytorch
+    pip3 install torch torchvision torchaudio --index-url "$index_url"
+
+    # install dependencies (rocm)
+    pip3 install -r requirements_amd.txt
+{% endif %}
+  deactivate
+popd
diff --git a/...irootfs/scripts/1000-vllm-dependencies.sh → .../scripts/1000-vllm-dependencies.sh.jinja2 b/...irootfs/scripts/1000-vllm-dependencies.sh → .../scripts/1000-vllm-dependencies.sh.jinja2
@@ -3,6 +3,7 @@ set -eu
 
 # vllm dependencies
 pushd "vllm"
+{% if CUDA %}
   # disable package caching
   export PIP_NO_CACHE_DIR=0
 
@@ -43,4 +44,5 @@ pushd "vllm"
     # install dependencies
     pip3 install -r requirements.txt
   deactivate
+{% endif %}
 popd
diff --git a/airootfs/root/customize_airootfs/scripts/2000-automatic-cleanup.sh b/airootfs/root/customize_airootfs/scripts/2000-automatic-cleanup.sh
diff --git a/...ustomize_airootfs/scripts/9999-cleanup.sh → ...e_airootfs/scripts/9999-cleanup.sh.jinja2 b/...ustomize_airootfs/scripts/9999-cleanup.sh → ...e_airootfs/scripts/9999-cleanup.sh.jinja2
@@ -9,3 +9,11 @@ rm -fr /home/tori/.config/matplotlib
 
 # keras
 rm -fr /home/tori/.keras
+
+{% if ROCm %}
+# remove axolotl if rocm
+rm -fr /home/tori/axolotl
+
+# remove vllm if rocm
+rm -fr /home/tori/vllm
+{% endif %}
+1 −0		README.md
+116 −0		awq-py/README.md
+254 −0		awq-py/awq/apply_awq.py
+2 −0		awq-py/requirements.txt
+90 −3		convert-hf-to-gguf.py
+14 −0		convert.py
+7 −7		examples/finetune/finetune.cpp
+19 −12		examples/server/server.cpp
+13 −1		gguf-py/gguf/constants.py
+14 −1		gguf-py/gguf/tensor_mapping.py
+219 −14		llama.cpp
+ −		models/ggml-vocab-gpt2.gguf
+1 −0		tests/CMakeLists.txt
+9 −1		css/html_instruct_style.css
+20 −14		css/main.css
+35 −13		js/main.js
+2 −0		js/show_controls.js
+1 −0		settings-template.yaml