chaos-test #1023
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: "chaos-test" | |
on: | |
push: | |
branches: | |
- 'release-**' | |
- 'main' | |
paths: | |
- '**/chaos.yml' | |
pull_request: | |
branches: | |
- 'main' | |
- 'release-**' | |
paths: | |
- '**/chaos.yml' | |
workflow_dispatch: | |
inputs: | |
debug: | |
type: boolean | |
description: "Run the build with tmate debugging enabled" | |
required: false | |
default: false | |
schedule: | |
- cron: '0 20 * * *' | |
jobs: | |
chaos-test: | |
timeout-minutes: 60 | |
runs-on: ubuntu-20.04 | |
strategy: | |
fail-fast: false | |
matrix: | |
# chaos: ["minio-io", "minio-memory", "minio-cpu", "minio-bandwidth", "redis-bandwidth", "redis-io", "redis-delay", "redis-memory", "redis-cpu", "juicefs-bandwidth", "juicefs-memory", "juicefs-cpu", "juicefs-delay"] | |
chaos: ["minio-io", "minio-memory", "minio-cpu", "minio-bandwidth", "redis-io", "redis-delay", "redis-memory", "redis-cpu", "juicefs-bandwidth", "juicefs-memory", "juicefs-cpu", "juicefs-delay"] | |
# chaos: ["minio-io"] | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: 1 | |
- uses: actions/setup-go@v3 | |
with: | |
go-version: 'oldstable' | |
cache: true | |
- name: Build | |
timeout-minutes: 10 | |
run: | | |
sudo .github/scripts/apt_install.sh musl-tools upx-ucl | |
export STATIC=1 | |
make juicefs | |
- name: Creating kind cluster | |
uses: helm/[email protected] | |
- name: Print cluster information | |
run: | | |
kubectl config view | |
kubectl cluster-info | |
kubectl get nodes | |
kubectl get pods -n kube-system | |
helm version | |
kubectl version | |
# - name: Build And Load CSI Docker Image | |
# run: | | |
# echo GITHUB_REF is $GITHUB_REF | |
# echo GITHUB_SHA is $GITHUB_SHA | |
# helm repo add juicefs https://juicedata.github.io/charts/ | |
# helm repo update | |
# APP_VERSION=$(helm search repo juicefs/juicefs-csi-driver --versions | grep juicefs | head -1 | awk -F" " '{print $3}') | |
# echo APP_VERSION is $APP_VERSION | |
# docker build --build-arg GITHUB_REF=$GITHUB_REF --build-arg GITHUB_SHA=$GITHUB_SHA -f .github/scripts/chaos/juicefs-csi-driver.Dockerfile -t juicedata/juicefs-csi-driver:v$APP_VERSION . | |
# kind load docker-image juicedata/juicefs-csi-driver:v$APP_VERSION --name chart-testing | |
- name: Build And Load CSI Docker Image | |
run: | | |
version=`./juicefs version |awk '{print $3}' | cut -d '-' -f1` | |
docker build -f .github/scripts/chaos/juicefs.Dockerfile -t juicedata/mount:ce-v${version} . | |
helm repo add juicefs https://juicedata.github.io/charts/ | |
helm repo update | |
kind load docker-image juicedata/mount:ce-v${version} --name chart-testing | |
- name: Install JuiceFS CSI Driver | |
run: | | |
CHART_VERSION=$(helm search repo juicefs/juicefs-csi-driver --versions | grep juicefs | head -1 | awk -F" " '{print $2}') | |
echo CHART_VERSION is $CHART_VERSION | |
helm install juicefs-csi-driver juicefs/juicefs-csi-driver -n kube-system --version $CHART_VERSION | |
kubectl -n kube-system get pods -l app.kubernetes.io/name=juicefs-csi-driver | |
- name: Deploy redis | |
run: | | |
kubectl apply -f .github/scripts/chaos/redis.yaml | |
- name: Deploy minio | |
run: | | |
kubectl apply -f .github/scripts/chaos/minio.yaml | |
- name: Mount Juicefs | |
run: | | |
version=`./juicefs version |awk '{print $3}' | cut -d '-' -f1` | |
sed -i "s/mount:ci/mount:ce-v$version/" .github/scripts/chaos/sc.yaml | |
kubectl apply -f .github/scripts/chaos/sc.yaml | |
kubectl apply -f .github/scripts/chaos/pvc.yaml | |
- name: Start vdbenh | |
run: | | |
kubectl apply -f .github/scripts/chaos/dynamic.yaml | |
- name: Install Chaos Mesh | |
run: | | |
helm version | |
kubectl version | |
helm repo add chaos-mesh https://charts.chaos-mesh.org | |
kubectl create ns chaos-mesh | |
helm install chaos-mesh chaos-mesh/chaos-mesh -n=chaos-mesh --version 2.5.1 \ | |
--set chaosDaemon.runtime=containerd \ | |
--set chaosDaemon.socketPath=/run/containerd/containerd.sock \ | |
--set controllerManager.replicaCount=1 | |
echo "wait pod status to running" | |
for ((k=0; k<120; k++)); do | |
kubectl get pods --namespace chaos-mesh -l app.kubernetes.io/instance=chaos-mesh > pods.status | |
cat pods.status | |
run_num=`grep Running pods.status | wc -l` | |
pod_num=$((`cat pods.status | wc -l` - 1)) | |
if [ $run_num == $pod_num ]; then | |
break | |
fi | |
sleep 1 | |
done | |
- name: Run chaos mesh action | |
run: | | |
chaos=${{matrix.chaos}} | |
sed -i "s/# - $chaos/- $chaos/g" .github/scripts/chaos/workflow.yaml | |
cat .github/scripts/chaos/workflow.yaml | |
kubectl apply -f .github/scripts/chaos/workflow.yaml | |
- name: Verify | |
run: | | |
for i in {1..1200}; do | |
if kubectl get pods --all-namespaces | grep dynamic-ce | grep -i "Completed"; then | |
echo "dynamic-ce is completed in $i seconds" | |
break | |
else | |
if [ $((i % 10)) -eq 0 ]; then | |
echo "dynamic-ce is not completed in $i seconds" | |
fi | |
sleep 1 | |
fi | |
done | |
kubectl get pods --all-namespaces | |
apps=("dynamic-ce" "juicefs-csi-node" "juicefs-csi-controller" "juicefs-chart-testing-control-plane-pvc" "redis" "minio") | |
for app in ${apps[@]}; do | |
echo app is $app | |
kubectl get pods --all-namespaces | grep $app | grep -i "Running\|Completed" | |
if [ $? != 0 ]; then | |
echo status of $app is not expected. | |
exit 1 | |
fi | |
done | |
- name: Check mount pod | |
if: always() | |
run: | | |
POD_NAME=$(kubectl get pods -n kube-system -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}' | grep juicefs-chart-testing-control-plane-pvc) | |
echo POD_NAME is $POD_NAME | |
for pod in $POD_NAME;do | |
kubectl -n kube-system describe po $pod | |
kubectl logs -n kube-system $pod > juicefs.log | |
cat juicefs.log | |
grep "<FATAL>:" juicefs.log && exit 1 || true | |
done | |
- name: Mount pod upgrade | |
timeout-minutes: 5 | |
run: | | |
chaos=${{matrix.chaos}} | |
skip_conditions=("minio-io") | |
if [[ "${skip_conditions[*]}" =~ "$chaos" ]]; then | |
echo "skip mount pod upgrade" | |
exit 0 | |
else | |
CSI_POD_NAME=$(kubectl get pods -n kube-system -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}' | grep juicefs-csi-node) | |
PVC_POD_NAME=$(kubectl get pods -n kube-system -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}' | grep juicefs-chart-testing-control-plane-pvc) | |
kubectl exec $CSI_POD_NAME -n kube-system -- juicefs-csi-driver upgrade $PVC_POD_NAME 2>&1 | tee upgrade.log | |
sleep 5 | |
if ! grep "SUCCESS" upgrade.log;then exit -1;fi | |
rm upgrade.log | |
kubectl exec $CSI_POD_NAME -n kube-system -- juicefs-csi-driver upgrade $PVC_POD_NAME --restart 2>&1 | tee upgrade.log || true | |
sleep 5 | |
fi | |
kubectl delete -f .github/scripts/chaos/workflow.yaml | |
- name: Check csi controller log | |
if: always() | |
run: | | |
kubectl describe pvc dynamic-ce | |
kubectl -n kube-system get po -l app=juicefs-csi-controller | |
kubectl -n kube-system logs juicefs-csi-controller-0 juicefs-plugin | |
- name: Check csi node log | |
if: always() | |
run: | | |
POD_NAME=$(kubectl get pods -n kube-system -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}' | grep juicefs-csi-node) | |
echo POD_NAME is $POD_NAME | |
kubectl -n kube-system describe po $POD_NAME | |
kubectl -n kube-system logs $POD_NAME -c juicefs-plugin > csi_node.log | |
cat csi_node.log | |
# grep -i "error" csi_node.log && exit 1 || true | |
- name: Check mount point pod | |
if: always() | |
run: | | |
POD_NAME=$(kubectl get pods -n kube-system | grep juicefs-chart-testing-control-plane-pvc | grep Running | awk '{print $1}') | |
echo POD_NAME is $POD_NAME | |
for pod in $POD_NAME;do | |
kubectl -n kube-system describe po $pod | |
kubectl logs -n kube-system $pod > juicefs.log | |
cat juicefs.log | |
grep "<FATAL>:" juicefs.log && exit 1 || true | |
done | |
- name: Check vdbench log | |
if: always() | |
run: | | |
POD_NAME=$(kubectl get pods -n default -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}' | grep dynamic-ce ) | |
echo POD_NAME is $POD_NAME | |
kubectl -n default describe po $POD_NAME | |
kubectl logs -n default $POD_NAME > vdbench.log | |
cat vdbench.log | |
# grep -i "error" vdbench.log && exit 1 || true | |
- name: Check Redis log | |
if: always() | |
run: | | |
POD_NAME=$(kubectl get pods -n kube-system -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}' | grep redis ) | |
echo POD_NAME is $POD_NAME | |
kubectl -n kube-system describe po $POD_NAME | |
kubectl logs -n kube-system $POD_NAME > redis.log | |
cat redis.log | |
# grep -i "error" redis.log && exit 1 || true | |
- name: Check Minio log | |
if: always() | |
run: | | |
POD_NAME=$(kubectl get pods -n kube-system -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}' | grep minio ) | |
echo POD_NAME is $POD_NAME | |
kubectl -n kube-system describe po $POD_NAME | |
kubectl logs -n kube-system $POD_NAME > minio.log | |
cat minio.log | |
# grep -i "error" minio.log && exit 1 || true | |
- name: Setup upterm session | |
if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1) | |
# if: failure() | |
timeout-minutes: 60 | |
uses: lhotari/action-upterm@v1 | |
success-all-test: | |
runs-on: ubuntu-latest | |
needs: [chaos-test] | |
if: always() | |
steps: | |
- uses: technote-space/workflow-conclusion-action@v3 | |
- uses: actions/checkout@v3 | |
- name: Check Failure | |
if: env.WORKFLOW_CONCLUSION == 'failure' | |
run: exit 1 | |
- name: Send Slack Notification | |
if: failure() && github.event_name != 'workflow_dispatch' | |
uses: juicedata/slack-notify-action@main | |
with: | |
channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}" | |
slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}" | |
- name: Success | |
if: success() | |
run: echo "All Done" |