-
Notifications
You must be signed in to change notification settings - Fork 986
239 lines (210 loc) · 8.99 KB
/
chaos.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
name: "chaos-test"
on:
push:
branches:
- 'release-**'
paths-ignore:
- 'docs/**'
- '**.md'
workflow_dispatch:
inputs:
debug:
type: boolean
description: "Run the build with tmate debugging enabled"
required: false
default: false
schedule:
- cron: '0 0 * * *'
jobs:
chaos-test:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
# chaos: ["minio-io", "minio-memory", "minio-cpu", "minio-bandwidth", "redis-bandwidth", "redis-io", "redis-delay", "redis-memory", "redis-cpu", "juicefs-bandwidth", "juicefs-memory", "juicefs-cpu", "juicefs-delay"]
chaos: ["minio-io", "minio-memory", "minio-cpu", "minio-bandwidth", "redis-io", "redis-delay", "redis-memory", "redis-cpu", "juicefs-bandwidth", "juicefs-memory", "juicefs-cpu", "juicefs-delay"]
# chaos: ["minio-io"]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 1
- uses: actions/setup-go@v3
with:
go-version: 'oldstable'
cache: true
- name: Build
run: |
sudo .github/scripts/apt_install.sh musl-tools upx-ucl
export STATIC=1
make juicefs
- name: Creating kind cluster
uses: helm/[email protected]
- name: Print cluster information
run: |
kubectl config view
kubectl cluster-info
kubectl get nodes
kubectl get pods -n kube-system
helm version
kubectl version
# - name: Build And Load CSI Docker Image
# run: |
# echo GITHUB_REF is $GITHUB_REF
# echo GITHUB_SHA is $GITHUB_SHA
# helm repo add juicefs https://juicedata.github.io/charts/
# helm repo update
# APP_VERSION=$(helm search repo juicefs/juicefs-csi-driver --versions | grep juicefs | head -1 | awk -F" " '{print $3}')
# echo APP_VERSION is $APP_VERSION
# docker build --build-arg GITHUB_REF=$GITHUB_REF --build-arg GITHUB_SHA=$GITHUB_SHA -f .github/scripts/chaos/juicefs-csi-driver.Dockerfile -t juicedata/juicefs-csi-driver:v$APP_VERSION .
# kind load docker-image juicedata/juicefs-csi-driver:v$APP_VERSION --name chart-testing
- name: Build And Load CSI Docker Image
run: |
docker build -f .github/scripts/chaos/juicefs.Dockerfile -t juicedata/mount:ci .
helm repo add juicefs https://juicedata.github.io/charts/
helm repo update
kind load docker-image juicedata/mount:ci --name chart-testing
- name: Install JuiceFS CSI Driver
run: |
CHART_VERSION=$(helm search repo juicefs/juicefs-csi-driver --versions | grep juicefs | head -1 | awk -F" " '{print $2}')
echo CHART_VERSION is $CHART_VERSION
helm install juicefs-csi-driver juicefs/juicefs-csi-driver -n kube-system --version $CHART_VERSION
kubectl -n kube-system get pods -l app.kubernetes.io/name=juicefs-csi-driver
- name: Deploy redis
run: |
kubectl apply -f .github/scripts/chaos/redis.yaml
- name: Deploy minio
run: |
kubectl apply -f .github/scripts/chaos/minio.yaml
- name: Mount Juicefs
run: |
kubectl apply -f .github/scripts/chaos/sc.yaml
kubectl apply -f .github/scripts/chaos/pvc.yaml
- name: Start vdbenh
run: |
kubectl apply -f .github/scripts/chaos/dynamic.yaml
- name: Install Chaos Mesh
run: |
helm version
kubectl version
helm repo add chaos-mesh https://charts.chaos-mesh.org
kubectl create ns chaos-mesh
helm install chaos-mesh chaos-mesh/chaos-mesh -n=chaos-mesh --version 2.5.1 \
--set chaosDaemon.runtime=containerd \
--set chaosDaemon.socketPath=/run/containerd/containerd.sock \
--set controllerManager.replicaCount=1
echo "wait pod status to running"
for ((k=0; k<120; k++)); do
kubectl get pods --namespace chaos-mesh -l app.kubernetes.io/instance=chaos-mesh > pods.status
cat pods.status
run_num=`grep Running pods.status | wc -l`
pod_num=$((`cat pods.status | wc -l` - 1))
if [ $run_num == $pod_num ]; then
break
fi
sleep 1
done
- name: Run chaos mesh action
run: |
chaos=${{matrix.chaos}}
sed -i "s/# - $chaos/- $chaos/g" .github/scripts/chaos/workflow.yaml
cat .github/scripts/chaos/workflow.yaml
kubectl apply -f .github/scripts/chaos/workflow.yaml
- name: Verify
run: |
for i in {1..1200}; do
if kubectl get pods --all-namespaces | grep dynamic-ce | grep -i "Completed"; then
echo "dynamic-ce is completed in $i seconds"
break
else
if [ $((i % 10)) -eq 0 ]; then
echo "dynamic-ce is not completed in $i seconds"
fi
sleep 1
fi
done
kubectl get pods --all-namespaces
apps=("dynamic-ce" "juicefs-csi-node" "juicefs-csi-controller" "juicefs-chart-testing-control-plane-pvc" "redis" "minio")
for app in ${apps[@]}; do
echo app is $app
kubectl get pods --all-namespaces | grep $app | grep -i "Running\|Completed"
if [ $? != 0 ]; then
echo status of $app is not expected.
exit 1
fi
done
- name: Check csi controller log
if: ${{ always() }}
run: |
kubectl describe pvc dynamic-ce
kubectl -n kube-system get po -l app=juicefs-csi-controller
kubectl -n kube-system logs juicefs-csi-controller-0 juicefs-plugin
- name: Check csi node log
if: ${{ always() }}
run: |
POD_NAME=$(kubectl get pods -n kube-system -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}' | grep juicefs-csi-node)
echo POD_NAME is $POD_NAME
kubectl -n kube-system describe po $POD_NAME
kubectl -n kube-system logs $POD_NAME -c juicefs-plugin > csi_node.log
cat csi_node.log
# grep -i "error" csi_node.log && exit 1 || true
- name: Check mount point pod
if: ${{ always() }}
run: |
POD_NAME=$(kubectl get pods -n kube-system -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}' | grep juicefs-chart-testing-control-plane-pvc)
echo POD_NAME is $POD_NAME
kubectl -n kube-system describe po $POD_NAME
kubectl logs -n kube-system $POD_NAME > juicefs.log
cat juicefs.log
grep "<FATAL>:" juicefs.log && exit 1 || true
- name: Check vdbench log
if: ${{ always() }}
run: |
POD_NAME=$(kubectl get pods -n default -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}' | grep dynamic-ce )
echo POD_NAME is $POD_NAME
kubectl -n default describe po $POD_NAME
kubectl logs -n default $POD_NAME > vdbench.log
cat vdbench.log
# grep -i "error" vdbench.log && exit 1 || true
- name: Check Redis log
if: ${{ always() }}
run: |
POD_NAME=$(kubectl get pods -n kube-system -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}' | grep redis )
echo POD_NAME is $POD_NAME
kubectl -n kube-system describe po $POD_NAME
kubectl logs -n kube-system $POD_NAME > redis.log
cat redis.log
# grep -i "error" redis.log && exit 1 || true
- name: Check Minio log
if: ${{ always() }}
run: |
POD_NAME=$(kubectl get pods -n kube-system -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}' | grep minio )
echo POD_NAME is $POD_NAME
kubectl -n kube-system describe po $POD_NAME
kubectl logs -n kube-system $POD_NAME > minio.log
cat minio.log
# grep -i "error" minio.log && exit 1 || true
- name: Setup upterm session
if: ${{ failure() && github.event_name == 'workflow_dispatch' && github.event.inputs.debug == 'true' }}
# if: failure()
timeout-minutes: 60
uses: lhotari/action-upterm@v1
success-all-test:
runs-on: ubuntu-latest
needs: [chaos-test]
if: always()
steps:
- uses: technote-space/workflow-conclusion-action@v3
- uses: actions/checkout@v3
- name: Check Failure
if: env.WORKFLOW_CONCLUSION == 'failure'
run: exit 1
- name: Send Slack Notification
if: ${{ failure() && github.event_name != 'workflow_dispatch' }}
uses: juicedata/slack-notify-action@main
with:
channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"
- name: Success
if: ${{ success() }}
run: echo "All Done"