Skip to content

Commit

Permalink
Install Descheduler, fix startup readywait
Browse files Browse the repository at this point in the history
Descheduler will be used for eve-app rebalancing during
cluster node reboots/upgrades in an upcoming PR.
Wait for longhorn daemonsets to be ready, before upcoming PR
to snapshot single-node /var/lib kube db.
Resolve sometimes failure to import external-boot-image
	Wait for containerd before importing.
	Tighter error checking on import.

Signed-off-by: Andrew Durbin <[email protected]>
  • Loading branch information
andrewd-zededa committed Oct 16, 2024
1 parent 1d2ad7a commit 0d2bd18
Show file tree
Hide file tree
Showing 5 changed files with 136 additions and 19 deletions.
1 change: 1 addition & 0 deletions .spdxignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ pkg/rngd/cmd/rngd/vendor/
pkg/wwan/mmagent/vendor/
tools/get-deps/vendor/
pkg/installer/vendor/
pkg/kube/descheduler-job.yaml
4 changes: 4 additions & 0 deletions pkg/kube/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ COPY iscsid.conf /etc/iscsi/
COPY longhorn-generate-support-bundle.sh /usr/bin/
COPY nsmounter /usr/bin/

# descheduler
COPY descheduler-job.yaml /etc/
COPY descheduler-policy-configmap.yaml /etc/

# Containerd config
RUN mkdir -p /etc/containerd
COPY config-k3s.toml /etc/containerd/
Expand Down
72 changes: 53 additions & 19 deletions pkg/kube/cluster-init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -300,20 +300,34 @@ check_start_containerd() {
logmsg "Started k3s-containerd at pid:$containerd_pid"
fi
if [ -f /etc/external-boot-image.tar ]; then
# Give containerd a moment to start before importing
for _ in 1 2 3; do
reported_pid=$(/var/lib/k3s/bin/k3s ctr -a /run/containerd-user/containerd.sock info | jq .server.pid)
if [ "$reported_pid" = "$containerd_pid" ]; then
logmsg "containerd online, continue to import"
break
fi
sleep 1
done

# NOTE: https://kubevirt.io/user-guide/virtual_machines/boot_from_external_source/
# Install external-boot-image image to our eve user containerd registry.
# This image contains just kernel and initrd to bootstrap a container image as a VM.
# This is very similar to what we do on kvm based eve to start container as a VM.
logmsg "Trying to install new external-boot-image"
# This import happens once per reboot
if ctr -a /run/containerd-user/containerd.sock image import /etc/external-boot-image.tar; then
eve_external_boot_img_tag=$(cat /run/eve-release)
eve_external_boot_img=docker.io/lfedge/eve-external-boot-image:"$eve_external_boot_img_tag"
import_tag=$(tar -xOf /etc/external-boot-image.tar manifest.json | jq -r '.[0].RepoTags[0]')
ctr -a /run/containerd-user/containerd.sock image tag "$import_tag" "$eve_external_boot_img"

logmsg "Successfully installed external-boot-image $import_tag as $eve_external_boot_img"
rm -f /etc/external-boot-image.tar
import_name_tag=$(tar -xOf /etc/external-boot-image.tar manifest.json | jq -r '.[0].RepoTags[0]')
import_name=$(echo "$import_name_tag" | cut -d ':' -f 1)
eve_external_boot_img_name="docker.io/lfedge/eve-external-boot-image"
if [ "$import_name" = "$eve_external_boot_img_name" ]; then
if /var/lib/k3s/bin/k3s ctr -a /run/containerd-user/containerd.sock image import /etc/external-boot-image.tar; then
eve_external_boot_img_tag=$(cat /run/eve-release)
eve_external_boot_img="${eve_external_boot_img_name}:${eve_external_boot_img_tag}"
if /var/lib/k3s/bin/k3s ctr -a /run/containerd-user/containerd.sock image tag "$import_name_tag" "$eve_external_boot_img"; then
logmsg "Successfully installed external-boot-image $import_name_tag as $eve_external_boot_img"
rm -f /etc/external-boot-image.tar
fi
fi
fi
fi
}
Expand Down Expand Up @@ -498,21 +512,41 @@ if [ ! -f /var/lib/all_components_initialized ]; then
fi

if [ ! -f /var/lib/longhorn_initialized ]; then
wait_for_item "longhorn"
logmsg "Installing longhorn version ${LONGHORN_VERSION}"
apply_longhorn_disk_config "$HOSTNAME"
lhCfgPath=/var/lib/lh-cfg-${LONGHORN_VERSION}.yaml
if [ ! -e $lhCfgPath ]; then
curl -k https://raw.githubusercontent.com/longhorn/longhorn/${LONGHORN_VERSION}/deploy/longhorn.yaml > "$lhCfgPath"
if [ ! -f /var/lib/longhorn_installing ]; then
wait_for_item "longhorn"
logmsg "Installing longhorn version ${LONGHORN_VERSION}"
apply_longhorn_disk_config "$HOSTNAME"
lhCfgPath=/var/lib/lh-cfg-${LONGHORN_VERSION}.yaml
if [ ! -e $lhCfgPath ]; then
curl -k https://raw.githubusercontent.com/longhorn/longhorn/${LONGHORN_VERSION}/deploy/longhorn.yaml > "$lhCfgPath"
fi
if ! grep -q 'create-default-disk-labeled-nodes: true' "$lhCfgPath"; then
sed -i '/ default-setting.yaml: |-/a\ create-default-disk-labeled-nodes: true' "$lhCfgPath"
fi
kubectl apply -f "$lhCfgPath"
touch /var/lib/longhorn_installing
fi
if ! grep -q 'create-default-disk-labeled-nodes: true' "$lhCfgPath"; then
sed -i '/ default-setting.yaml: |-/a\ create-default-disk-labeled-nodes: true' "$lhCfgPath"
lhStatus=$(kubectl -n longhorn-system get daemonsets -o json | jq '.items[].status | .numberReady==.desiredNumberScheduled' | tr -d '\n')
if [ "$lhStatus" = "truetruetrue" ]; then
logmsg "longhorn ready"
rm /var/lib/longhorn_installing
touch /var/lib/longhorn_initialized
fi
kubectl apply -f "$lhCfgPath"
touch /var/lib/longhorn_initialized
fi

if [ -f /var/lib/k3s_initialized ] && [ -f /var/lib/kubevirt_initialized ] && [ -f /var/lib/longhorn_initialized ]; then
#
# Descheduler
#
if [ ! -f /var/lib/descheduler_initialized ]; then
wait_for_item "descheduler"
logmsg "Installing Descheduler"
DESCHEDULER_VERSION="v0.29.0"
kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/descheduler/${DESCHEDULER_VERSION}/kubernetes/base/rbac.yaml
kubectl apply -f /etc/descheduler-policy-configmap.yaml
touch /var/lib/descheduler_initialized
fi

if [ -f /var/lib/k3s_initialized ] && [ -f /var/lib/kubevirt_initialized ] && [ -f /var/lib/longhorn_initialized ] && [ -f /var/lib/descheduler_initialized ]; then
logmsg "All components initialized"
touch /var/lib/all_components_initialized
fi
Expand Down
54 changes: 54 additions & 0 deletions pkg/kube/descheduler-job.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
---
# from: https://raw.githubusercontent.com/kubernetes-sigs/descheduler/${DESCHEDULER_VERSION}/kubernetes/job/job.yaml
apiVersion: batch/v1
kind: Job
metadata:
name: descheduler-job
namespace: kube-system
spec:
parallelism: 1
completions: 1
template:
metadata:
name: descheduler-pod
spec:
priorityClassName: system-cluster-critical
containers:
- name: descheduler
image: registry.k8s.io/descheduler/descheduler:v0.29.0
volumeMounts:
- mountPath: /policy-dir
name: policy-volume
command:
- "/bin/descheduler"
args:
- "--policy-config-file"
- "/policy-dir/policy.yaml"
- "--v"
- "3"
resources:
requests:
cpu: "500m"
memory: "256Mi"
livenessProbe:
failureThreshold: 3
httpGet:
path: /healthz
port: 10258
scheme: HTTPS
initialDelaySeconds: 3
periodSeconds: 10
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
privileged: false
readOnlyRootFilesystem: true
runAsNonRoot: true
restartPolicy: "Never"
serviceAccountName: descheduler-sa
volumes:
- name: policy-volume
configMap:
name: descheduler-policy-configmap
24 changes: 24 additions & 0 deletions pkg/kube/descheduler-policy-configmap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
---
apiVersion: v1
kind: ConfigMap
metadata:
name: descheduler-policy-configmap
namespace: kube-system
data:
policy.yaml: |
apiVersion: "descheduler/v1alpha2"
kind: "DeschedulerPolicy"
profiles:
- name: ProfileName
pluginConfig:
- name: "RemovePodsViolatingNodeAffinity"
args:
namespaces:
include:
- "eve-kube-app"
nodeAffinityType:
- "preferredDuringSchedulingIgnoredDuringExecution"
plugins:
deschedule:
enabled:
- "RemovePodsViolatingNodeAffinity"

0 comments on commit 0d2bd18

Please sign in to comment.