-
Notifications
You must be signed in to change notification settings - Fork 4
/
start-akash.sh
176 lines (150 loc) · 4.72 KB
/
start-akash.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#!/bin/bash
cd /home/akash
if [ -f variables ]; then
source /home/akash/variables
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
function configure_gpu() {
echo "Detected GPU but not set up. Starting configuration..."
helm repo add nvidia https://helm.ngc.nvidia.com/nvidia \
&& helm repo update \
&& helm install --wait --generate-name --create-namespace --namespace nvidia-device-plugin nvidia/gpu-operator --set driver.enabled=false --set toolkit.enabled=false --set migManager.enabled=false
function working_old(){
# Add Helm repositories
helm repo add nvdp https://nvidia.github.io/k8s-device-plugin
helm repo update
# Create NVIDIA RuntimeClass
cat > /home/akash/gpu-nvidia-runtime-class.yaml <<EOF
kind: RuntimeClass
apiVersion: node.k8s.io/v1
metadata:
name: nvidia
handler: nvidia
EOF
kubectl apply -f /home/akash/gpu-nvidia-runtime-class.yaml
# Install NVIDIA Device Plugin
helm upgrade -i nvdp nvdp/nvidia-device-plugin \
--namespace nvidia-device-plugin \
--create-namespace \
--set runtimeClassName="nvidia"
}
echo "Waiting 60 seconds for the GPU to settle..."
sleep 60
kubectl get pods -A -o wide
#Required for GPUs on Testnet
# kubectl label node akash-node1 akash.network/capabilities.gpu.vendor.nvidia.model.1080=true
# kubectl label node akash-node1 akash.network/capabilities.gpu.vendor.nvidia.model.3080ti=true
# Set GPU_ENABLED to true
echo "GPU_ENABLED=true" >> variables
}
function create_test_pod() {
cat > gpu-test-pod.yaml << EOF
apiVersion: v1
kind: Pod
metadata:
name: nbody-gpu-benchmark
namespace: default
spec:
restartPolicy: OnFailure
runtimeClassName: nvidia
containers:
- name: cuda-container
image: nvcr.io/nvidia/k8s/cuda-sample:nbody
args: ["nbody", "-gpu", "-benchmark"]
resources:
limits:
nvidia.com/gpu: 1
env:
- name: NVIDIA_VISIBLE_DEVICES
value: all
- name: NVIDIA_DRIVER_CAPABILITIES
value: all
EOF
k3s kubectl apply -f gpu-test-pod.yaml
echo "Waiting 60 seconds for the test pod to start..."
sleep 60
k3s kubectl get pods -A -o wide
k3s kubectl logs nbody-gpu-benchmark
k3s kubectl delete pod nbody-gpu-benchmark
}
if lspci | grep -q NVIDIA && ! grep -q "GPU_ENABLED=true" variables && ! grep -q "CLIENT_NODE=true" variables; then
# Wait until the 'cilium-*' pod is Running
while true; do
# Get the status of the 'cilium-*' pod
CILIUM_POD_STATUS=$(kubectl get pods -n kube-system | grep 'cilium' | awk '{print $3}' | tail -n1)
# Check if the pod is Running
if [[ $CILIUM_POD_STATUS == "Running" ]]; then
echo "The 'cilium' pod is Running."
break
else
echo "The 'cilium' pod is not Running yet. Waiting..."
sleep 10
fi
done
sudo -u akash ./run-helm-k3s.sh
configure_gpu
create_test_pod
fi
fi
#End NVIDIA
cleanup_bootstrap() {
if [ -f ./*bootstrap.sh ]; then
echo "Found old installers - cleaning up"
rm ./microk8s-bootstrap.sh 2>/dev/null
rm ./k3s-bootstrap.sh 2>/dev/null
rm ./kubespray-bootstrap.sh 2>/dev/null
fi
}
run_bootstrap() {
local method=$1
local bootstrap_script
case "$method" in
kubespray)
bootstrap_script="kubespray-bootstrap.sh"
;;
microk8s)
bootstrap_script="microk8s-bootstrap.sh"
;;
k3s)
bootstrap_script="k3s-bootstrap.sh"
;;
*)
echo "Invalid method: $method"
exit 1
;;
esac
wget -q --no-cache "https://raw.githubusercontent.com/cryptoandcoffee/akashos/main/$bootstrap_script"
chmod +x "$bootstrap_script"
echo "No setup detected! Enter the default password 'akash' to start the Akash installer"
sudo "./$bootstrap_script"
}
main() {
cleanup_bootstrap
if [ ! -f variables ]; then
while true; do
read -p "Which Kubernetes install method would you like to use (k3s/microk8s/kubespray)? (k3s): " method
read -p "Are you sure you want to install with the $method method? (y/n): " choice
case "$choice" in
[Yy])
run_bootstrap "$method"
break
;;
[Nn])
echo "Please try again with k3s if unsure"
sleep 3
;;
*)
echo "Invalid entry, please try again with Y or N"
sleep 3
;;
esac
done
else
source variables
if [[ $SETUP_COMPLETE == true ]]; then
export KUBECONFIG=/home/akash/.kube/kubeconfig
echo "Variables file detected - Setup complete."
fi
fi
}
# Execute the main function
main