Skip to content

Commit

Permalink
Merge pull request #767 from yevgeny-shnaidman/yevgeny/amd-bootc-6.1.2
Browse files Browse the repository at this point in the history
updating AMD  bootc image
  • Loading branch information
rhatdan authored Aug 29, 2024
2 parents 9f06613 + 8c51b1b commit fc758b5
Show file tree
Hide file tree
Showing 7 changed files with 266 additions and 41 deletions.
87 changes: 54 additions & 33 deletions training/amd-bootc/Containerfile
Original file line number Diff line number Diff line change
@@ -1,43 +1,49 @@
# Define the images to be used
ARG INSTRUCTLAB_IMAGE="quay.io/ai-lab/instructlab-amd:latest"
ARG BASEIMAGE="quay.io/centos-bootc/centos-bootc:stream9"
ARG DRIVER_TOOLKIT_IMAGE="quay.io/ai-lab/nvidia-builder:latest"

FROM ${DRIVER_TOOLKIT_IMAGE} AS builder

COPY repos.d/amdgpu.repo /etc/yum.repos.d/amdgpu.repo
COPY repos.d/RPM-GPG-KEY-AMD-ROCM /etc/pki/rpm-gpg/RPM-GPG-KEY-AMD-ROCM

USER root

RUN rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-AMD-ROCM \
&& dnf install -y amdgpu-dkms \
&& dnf clean all

# Define the base image for the second stage
FROM ${BASEIMAGE}

ARG VENDOR=''
LABEL vendor=${VENDOR}
LABEL org.opencontainers.image.vendor=${VENDOR}

ADD rocm.repo /etc/yum.repos.d/rocm.repo
RUN --mount=type=bind,from=builder,source=/,destination=/tmp/builder,ro \
export KERNEL_VERSION=$(rpm -q --qf '%{VERSION}-%{RELEASE}.%{ARCH}' kernel-core) \
&& rm -f /lib/modules/${KERNEL_VERSION}/kernel/drivers/gpu/drm/amd/amdgpu/amdgpu.ko.xz \
&& cp -r /tmp/builder/lib/modules/${KERNEL_VERSION}/extra /lib/modules/${KERNEL_VERSION}/extra \
&& cp -r /tmp/builder/lib/firmware/updates/amdgpu /lib/firmware/amdgpu \
&& depmod ${KERNEL_VERSION}

ARG EXTRA_RPM_PACKAGES=''
RUN mv /etc/selinux /etc/selinux.tmp && \
dnf install -y \
cloud-init \
pciutils \
rocm-smi \
tmux \
rsync \
skopeo \
${EXTRA_RPM_PACKAGES} \
&& dnf clean all \
&& mv /etc/selinux.tmp /etc/selinux \
&& ln -s ../cloud-init.target /usr/lib/systemd/system/default.target.wants

# Setup /usr/lib/containers/storage as an additional store for images.
# Remove once the base images have this set by default.
RUN grep -q /usr/lib/containers/storage /etc/containers/storage.conf || \
sed -i -e '/additionalimage.*/a "/usr/lib/containers/storage",' \
/etc/containers/storage.conf && \
if [ -f "/run/.input/ilab" ]; then \
cp /run/.input/ilab /usr/bin/ilab; \
else \
curl -o /usr/bin/ilab "https://raw.githubusercontent.com/containers/ai-lab-recipes/main/training/ilab-wrapper/ilab"; \
fi \
&& chmod +x /usr/bin/ilab
COPY repos.d/rocm.repo /etc/yum.repos.d/rocm.repo
COPY repos.d/RPM-GPG-KEY-AMD-ROCM /etc/pki/rpm-gpg/RPM-GPG-KEY-AMD-ROCM

ARG INSTRUCTLAB_IMAGE="quay.io/ai-lab/instructlab-amd:latest"
RUN rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-AMD-ROCM \
&& mv /etc/selinux /etc/selinux.tmp \
&& dnf install -y \
cloud-init \
pciutils \
rocm-smi \
rsync \
skopeo \
tmux \
${EXTRA_RPM_PACKAGES} \
&& dnf clean all \
&& mv /etc/selinux.tmp /etc/selinux \
&& ln -s ../cloud-init.target /usr/lib/systemd/system/default.target.wants

ARG SSHPUBKEY

Expand All @@ -46,20 +52,35 @@ ARG SSHPUBKEY
RUN if [ -n "${SSHPUBKEY}" ]; then \
set -eu; mkdir -p /usr/ssh && \
echo 'AuthorizedKeysFile /usr/ssh/%u.keys .ssh/authorized_keys .ssh/authorized_keys2' >> /etc/ssh/sshd_config.d/30-auth-system.conf && \
echo ${SSHPUBKEY} > /usr/ssh/root.keys && chmod 0600 /usr/ssh/root.keys; \
echo ${SSHPUBKEY} > /usr/ssh/root.keys && chmod 0600 /usr/ssh/root.keys; \
fi

RUN sed -i 's/__REPLACE_TRAIN_DEVICE__/cuda/' /usr/bin/ilab
RUN sed -i 's/__REPLACE_CONTAINER_DEVICE__/nvidia.com\/gpu=all/' /usr/bin/ilab
RUN sed -i "s%__REPLACE_CONTAINER_NAME__%${INSTRUCTLAB_IMAGE}%" /usr/bin/ilab
# Setup /usr/lib/containers/storage as an additional store for images.
# Remove once the base images have this set by default.
RUN grep -q /usr/lib/containers/storage /etc/containers/storage.conf || \
sed -i -e '/additionalimage.*/a "/usr/lib/containers/storage",' \
/etc/containers/storage.conf

ARG INSTRUCTLAB_IMAGE="quay.io/ai-lab/instructlab-amd:latest"
ARG INSTRUCTLAB_IMAGE_PULL_SECRET="instructlab-amd-pull"

COPY duplicated/ilab-wrapper/ilab /usr/bin/ilab
RUN chmod +x /usr/bin/ilab \
&& sed -i "s%__REPLACE_IMAGE_NAME__%${INSTRUCTLAB_IMAGE}%" /usr/bin/ilab

# Added for running as an OCI Container to prevent Overlay on Overlay issues.
VOLUME /var/lib/containers

# Prepull the instructlab image
RUN if [ -f "/run/.input/instructlab-amd/oci-layout" ]; then \
RUN --mount=type=secret,id=${INSTRUCTLAB_IMAGE_PULL_SECRET}/.dockerconfigjson \
if [ -f "/run/.input/instructlab-amd/oci-layout" ]; then \
IID=$(podman --root /usr/lib/containers/storage pull oci:/run/.input/instructlab-amd) && \
podman --root /usr/lib/containers/storage image tag ${IID} ${INSTRUCTLAB_IMAGE}; \
elif [ -f "/run/secrets/${INSTRUCTLAB_IMAGE_PULL_SECRET}/.dockerconfigjson" ]; then \
IID=$(sudo podman --root /usr/lib/containers/storage pull --authfile /run/secrets/${INSTRUCTLAB_IMAGE_PULL_SECRET}/.dockerconfigjson ${INSTRUCTLAB_IMAGE}); \
else \
IID=$(sudo podman --root /usr/lib/containers/storage pull ${INSTRUCTLAB_IMAGE}); \
fi

RUN podman system reset --force 2>/dev/null

LABEL image_version_id="${IMAGE_VERSION_ID}"
3 changes: 2 additions & 1 deletion training/amd-bootc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ bootc: prepare-files
$(ARCH:%=--platform linux/%) \
$(BUILD_ARG_FILE:%=--build-arg-file=%) \
$(EXTRA_RPM_PACKAGES:%=--build-arg EXTRA_RPM_PACKAGES=%) \
$(FROM:%=--from=%) \
$(DRIVER_TOOLKIT_IMAGE:%=--build-arg DRIVER_TOOLKIT_IMAGE=%) \
$(FROM:%=--build-arg BASEIMAGE=%) \
$(INSTRUCTLAB_IMAGE:%=--build-arg INSTRUCTLAB_IMAGE=%) \
$(SOURCE_DATE_EPOCH:%=--timestamp=%) \
$(VENDOR:%=--build-arg VENDOR=%) \
Expand Down
144 changes: 144 additions & 0 deletions training/amd-bootc/duplicated/ilab-wrapper/ilab
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#!/bin/bash

echo-err() { echo "$@" >&2; }

verify_range() {
subuid_range="$1"
username="$2"
NUMBER_OF_MATCHING_SUBUID_RANGES=$(if [[ -z "$subuid_range" ]]; then echo 0; else wc -l <<<"$subuid_range"; fi)

if [[ "$NUMBER_OF_MATCHING_SUBUID_RANGES" == 0 ]]; then
echo-err "No /etc/subuid range found for user $username ($UID)"
exit 1
elif [[ "$NUMBER_OF_MATCHING_SUBUID_RANGES" != 1 ]]; then
# TODO: Handle multiple subuid ranges. But for now, hard fail
echo-err "Multiple /etc/subuid ranges found for user $username ($UID), this is currently unsupported:"
echo-err "$subuid_range"
exit 1
fi
}

check_insights() {
if [[ -f /etc/insights-client/machine-id ]]; then
return
fi
if [[ -f /etc/ilab/insights-opt-out ]]; then
return
fi
local ID
eval "$(grep ^ID= /etc/os-release)"
if [[ "$ID" != "rhel" ]]; then
return
fi
cat << EOF
This host is not connected to Red Hat Insights.
To connect this host to Red Hat Insights run the following command:
sudo rhc connect --organization <org_id> --activation-key <your_activation_key>
To generate an Activation Key:
https://console.redhat.com/insights/connector/activation-keys (this page will also display your Organization ID).
For more information on Red Hat Insights, please visit:
https://docs.redhat.com/en/documentation/subscription_central/1-latest/html/getting_started_with_activation_keys_on_the_hybrid_cloud_console/assembly-creating-managing-activation-keys
EOF
exit 1
}

check_insights

# Template values replaced by container build
IMAGE_NAME="__REPLACE_IMAGE_NAME__"

ENTRYPOINT="ilab"
PARAMS=("$@")

if [[ -n "$ILAB_HOME" ]]; then
HOME="$ILAB_HOME"
fi

for dir in "$HOME/.cache" "$HOME/.config" "$HOME/.local"; do
mkdir -p "$dir"
done

if [[ "$1" = "shell" ]]; then
ENTRYPOINT=bash
PARAMS=()
fi

# If you need to mount additional volumes into the container, you can specify them
# using the ILAB_ADDITIONAL_MOUNTS environment variable.
#
# Example ILAB_ADDITIONAL_MOUNTS usage:
#
# ILAB_ADDITIONAL_MOUNTS="/host/path:/container/path /host/path2:/container/path2"
#
# If your path contains spaces, you can use quotes:
#
# ILAB_ADDITIONAL_MOUNTS="/host/path:/container/path '/host/path with spaces':/container/path"
ADDITIONAL_MOUNTS=()
if [ -n "${ILAB_ADDITIONAL_MOUNTS}" ]; then
# (eval is used here to allow the user to specify mounts that might have spaces in them)
eval "ADDITIONAL_MOUNTS=(${ILAB_ADDITIONAL_MOUNTS})"
fi
ADDITIONAL_MOUNT_OPTIONS=()
for PODMAN_MOUNT in "${ADDITIONAL_MOUNTS[@]}"; do
ADDITIONAL_MOUNT_OPTIONS+=("-v" "$PODMAN_MOUNT")
done

# Add pull-secret to additional mounts
# In case of normal user, /run/user is used (XDG_RUNTIME_DIR), if root, it will be /run/containers
for authfile in \
"${XDG_RUNTIME_DIR}/containers/auth.json" \
/run/user/${UID}/containers/auth.json \
/run/containers/${UID}/auth.json
do
if [[ -f "$authfile" ]]; then
ADDITIONAL_MOUNT_OPTIONS+=("-v" "$authfile:/run/containers/0/auth.json")
break
fi
done

# We run the container as sudo in order to be able to access the root container
# storage, which has the ilab image pre-pulled. But for security reasons we map
# root UID 0 inside the container to the current user's UID (and all the other
# subuids to the user's /etc/subuid range) so that we're effectively running
# the container as the current user.
#
# In the future, we will run podman as the current user, once we figure a
# reasonable way for the current user to access the root's user container
# storage.
if [[ "$UID" == 0 ]]; then
# If we're already running as root, we don't need to map any UIDs
IMPERSONATE_CURRENT_USER_PODMAN_FLAGS=()
else
CURRENT_USER_NAME=$(id --user --name)
CURRENT_USER_SUBUID_RANGE=$(awk \
--field-separator ':' \
--assign current_user="$CURRENT_USER_NAME" \
--assign current_uid="$UID" \
'$1 == current_user || $1 == current_uid {print $2 ":" $3}' \
/etc/subuid)

verify_range "$CURRENT_USER_SUBUID_RANGE" "$CURRENT_USER_NAME"

IMPERSONATE_CURRENT_USER_PODMAN_FLAGS=("--uidmap" "0:$UID" "--uidmap" "1:$CURRENT_USER_SUBUID_RANGE")
fi

PRESERVE_ENV="VLLM_LOGGING_LEVEL,NCCL_DEBUG,HOME,HF_TOKEN"
PODMAN_COMMAND=("sudo" "--preserve-env=$PRESERVE_ENV" "podman" "run" "--rm" "-it"
"${IMPERSONATE_CURRENT_USER_PODMAN_FLAGS[@]}"
"--device" "/dev/kfd" "--device" "/dev/dri"
"--security-opt" "label=disable" "--net" "host"
"--shm-size" "10G"
"--pids-limit" "-1"
"-v" "$HOME:$HOME"
"${ADDITIONAL_MOUNT_OPTIONS[@]}"
"--env" "VLLM_LOGGING_LEVEL"
"--env" "HOME"
"--env" "NCCL_DEBUG"
"--entrypoint" "$ENTRYPOINT"
"--env" "HF_TOKEN"
"${IMAGE_NAME}")

exec "${PODMAN_COMMAND[@]}" "${PARAMS[@]}"
52 changes: 52 additions & 0 deletions training/amd-bootc/repos.d/RPM-GPG-KEY-AMD-ROCM
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
-----BEGIN PGP PUBLIC KEY BLOCK-----
Version: GnuPG v1

mQINBFefsSABEADmVqQyRi5bcUs/eG8mnKLdY+V+xuKuHLuujlXinSaMFRO640Md
C2HNYLSd58Z8cB1rKfiN639CZp+SkDWq60cFXDCcX9djT0JmBzsTD/gwoMr16tMY
O+Z2mje2pEYgDJdmYrephhXn29BfebW1IQKdA+4C7l675mJ/T8yVMUNXC0hqfGDA
h1MJUQy/lz1S2fGdjCKX0PiYOnCOyhNa7aTpw9PkZWgEa/s4BhplFZxvLohrCcf6
ks0gUITHfeEhJvj2KurRfL68DgFifGnG+/fsMHgW1Xp19GsnIVaoh6cV7/iFHhrb
6YHI1fdOq/mwOfG8mJnXmDXC/o24Q7mRRwvoJcsT0j+thRirs8trV01mKY+7Hxd2
CamWttibo062pjWN2aEUMPmEU2kmGOupsZtlpqn6SGCd2+6maOPMNEq/F0EWxhul
q6mgezVb8pvJ3bwvph2/lMSgfT9fHs6UIh4i/3rnA5/JaejFonlnS9xEuglKjklj
UoikSPBOwjvoPW2u99WCflURFSXVvuk7Ci+XkbVPIZyD6gFJjeY02Ic5MAv5tj/z
0fpgr/CfwEllms+z7qz768xRweA0kmPTTARdufVTna6EV3K3njxvCIIfnrp1cF6S
e3VrREd98gO0Rmzy74UFqkXl9Tb/+UILx1qVRmOBinwacKGqzo+k9jPUKQARAQAB
tChBTUQgTUxTRSBEZXZPcHMgPGRsLk1MU0UuRGV2T3BzQGFtZC5jb20+iQI+BBMB
AgAoAhsDBgsJCAcDAgYVCAIJCgsEFgIDAQIeAQIXgAUCYfuRkwUJE8Hh5wAKCRCT
hrSKGmk8XI1AEACSJLVGHCLJOOKz9fbUR4KWl7Gpv0RWccwxhH01jNZTSXUCEnKA
2KYmaqFvrT5szxWILobmCNYtAlbdkpUfb0mMaF3UtTu+1UMOw2ExzxHw1FyA+z6d
vLqDKXLldsOFUfojDUhD5cK6uvONPc1orCf/4ve6wnRG838bAzb4VrFR64IxfPjx
NukH+jo2nEXNpnNv44DEiq65CcObaPuwAVBFnRYD/ByPO4ZArxFXqNzHRxpoZkKv
iwzhbPG4cirioqzRR9y2SsC+a2sO4a/jH0wOL2+n4L86xShYcuCBxXvS/AwrV/aO
JxKOfAUV4VQegAOQz64L+iz7PslNSTILJGdvGcC5Ckgpo6evdWBT7KdGXhzf4S1f
wZjYyP9sfQa7LxqyrkLHZqYt4If4Jmukx7cApBYp1nPnuCQrLU6D4Arq0ZVWQuNV
hbABLeqwdVQcX+vG/Kr/ZC+Vkv3Z8oElwVGAAQ6HNXr/u8ud2bu6iNJ5mcQbM1HD
KTNt5LUrk0p588a8dk0/TyC5xeKSv51iNL+aOVaTr0pRwgaHtEVar2i0FPC1mkr4
1hhIDddx8WLoUt/52f1juyr/4CpL1M5f1cbMVjV6i0kqIEx/hxrryc+fZZQT5R4M
vysxcsh8ttgpABG5vzz2rLOCanmQ4eDdmlugzn/u0ngoDdnC0gEfnVVutLkCDQRX
n7EgARAAlsWVKSOQicuBxBlo3U5tre5whSyAOWHuy6/heGwCkGssTahbIL8pRwOL
5nKJCPCKKJ4YYoZ+Jzer9WTsDRZU/zpQXK9C5WdfF6DN/Fai3lqhgeDDVyF0hUDr
NQigm/w66JEYTGtMcC5PnYv7S6Zrn9WN4anv9n5thNwfsqxpbbg6sAQ2aLHLsW96
myQE9v1s0YoSZYc7rFYBwszE+tFX0kLlyBYSRVns/USQifu66RObO706d8DHp6Ro
vO6WgsTu+0RR2FEUabBx1q6iKe1cqK0FYtWd8tXCpqQBm0zGC6UwTp4Z4GMCX2Pk
3xAMmrItW5kPKCANB+P/8ZoOoZLIX5Fr9axQ496lUh0ZDhOACewJfj9Szk9GN5rq
+2QKnRepatevGBVaN0lCAEwg2q9/9xmrT6CixFrbnw2T6mWHM3jQrvduqmC0c1Cd
uMZBGDKSpjouaN0UKtC+udwWiY7w452pcjCnUjzjk7tR1IarSCnLLYeb+MDCK83M
CFH60SmBfdqjRiTiLas34KSKNnmbfUfrTYswf0Oed/qXAUSlYOCmWl4sV8n+Ebpy
XfY80/fzu95RbpMEZMhUTRtvr64O5jaWM/lFnubnegGTW3Bk/fBR2VRsBx56ZHlc
JH23f6IREjQ1x4B2UsINYfyYpmzb+R4qpMzycBVHv9ipiYQsQ8sAEQEAAYkCJQQY
AQoADwIbDAUCYfuRtwUJE8HiEAAKCRCThrSKGmk8XMAcEACd0jYXjnu7qoEY4U9Q
47X2SeJmWsuTavCrU5AWxjYwWd0mtDqK8EynxDPq7UFs+8+OukqrE++p0bfBbDl9
TwnwmSSdizAZriHMSgeg9GR5KVL4mreNhFQdk/6mTFdlRhi5s7ZuvPayLSMIAWaj
ET5gFMeO1B/ABSpaKEZwQjRcXrto/hCUJ++7qoosblhcgwX7fiqZZbMxcoCEQIQQ
7ZasLxpVtaeDVfetp2zO5F0/e3D/sNbvBrlDofSt6D5V2cmKjLqONFVc6JrzSNeK
k9Gn8UVzAKfRfLaQyDaoFV0MbBf3q111UQQPkvwZYp0lPT6t2/G8zoubwFhHsM31
K5ZBbt0384hI9RJITo9/krXVXLYFeCLcoPKn/fGWgAwyYAYr6C7JcocxTNUyCd1I
AVg4SO/JuC3NWFQK5LhknN/gJkFlLZdB2cWqu9dDIkx1cHXThaM2n/7GSxv7fzrI
Br1jhZjUPWJ2iOd8iHgVEkIEvZql8z+huSxcNemodEN1emmUUoIyY3Fh0lJmozDt
ZPATk3iPpksOApsDVhWXP96RjTYEozYCxgTxCnk+kX/iJIlt53BPNWm9HMTcmtDI
v3s7OEcw0DN3U2VKcL9Q4Sg3uNfhwQsw/xBJaxAHQn5lN/8t0eLt+U653ooEEr0o
ta5TfPumStSQ1UjP8pPny4l+JQ==
=UOE+
-----END PGP PUBLIC KEY BLOCK-----
7 changes: 7 additions & 0 deletions training/amd-bootc/repos.d/amdgpu.repo
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[amdgpu]
name=amdgpu
baseurl=https://repo.radeon.com/amdgpu/6.1.2/el/9.4/main/x86_64/
enabled=1
priority=50
gpgcheck=1
gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key
7 changes: 7 additions & 0 deletions training/amd-bootc/repos.d/rocm.repo
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[ROCm-6.2]
name=ROCm6.2
baseurl=https://repo.radeon.com/rocm/el9/6.1.2/main
enabled=1
priority=50
gpgcheck=1
gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key
7 changes: 0 additions & 7 deletions training/amd-bootc/rocm.repo

This file was deleted.

0 comments on commit fc758b5

Please sign in to comment.