-
Notifications
You must be signed in to change notification settings - Fork 807
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add attachment limit scripts to hack/cluster-debugging-scripts
- Loading branch information
1 parent
b894ce5
commit 28cc128
Showing
6 changed files
with
414 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
# Cluster Debugging Scripts | ||
|
||
This folder contains a collection of scripts to help debug clusters. | ||
|
||
## FAQ | ||
|
||
### How can I validate that the aws-ebs-csi-driver correctly makes use of all available attachment slots for instance type X? | ||
|
||
Answer: Perform the following steps (Which will create a nodegroup, count the Block Device Mappings + ENI for the underlying instance, deploy pods EBS PVs until the script finds the maximum amount of volumes the aws-ebs-csi-driver can attach to instance) | ||
|
||
``` | ||
export CLUSTER_NAME="devcluster" | ||
export NODEGROUP_NAME="ng-attachment-limit-test" | ||
export INSTANCE_TYPE="m7g.large" | ||
eksctl create nodegroup -c "$CLUSTER_NAME" --nodes 1 -t "$INSTANCE_TYPE" -n "$NODEGROUP_NAME" | ||
./get-attachment-breakdown "$NODEGROUP_NAME" | ||
eksctl delete nodegroup -c "$CLUSTER_NAME" -n "$NODEGROUP_NAME" | ||
``` | ||
|
||
By the end of the script, you should see an output similar to this: | ||
``` | ||
Attachments for ng-f3ecdf71 | ||
BlockDeviceMappings ENIs Available-Attachment-Slots(Validated-by-aws-ebs-csi-driver) | ||
1 2 25 | ||
``` | ||
|
||
## Scripts | ||
|
||
get-attachment-breakdown: Find the maximum amount of volumes that can be attached to a specified nodegroup node. Additionally, log how many BlockDeviceMappings and ENIs are attached the instance of the specified nodegroup. | ||
|
||
Examples | ||
``` | ||
./get_attachment_breakdown "ng-f3ecdf71" | ||
MIN_VOLUME_GUESS=20 MAX_VOLUME_GUESS=40 POD_TIMEOUT_SECONDS=120 ./get_attachment_breakdown "ng-f3ecdf71" | ||
``` | ||
|
||
find_attachment_limit: Find the maximum amount of volumes the aws-ebs-csi-driver can attach to a node. | ||
|
||
Examples: | ||
``` | ||
./find-attachment-limit 'some.node.affinity.key:value' | ||
./find-attachment-limit 'eks.amazonaws.com/nodegroup:test-nodegroup' | ||
./find-attachment-limit 'node.kubernetes.io/instance-type:m5.large' | ||
MIN_VOLUME_GUESS=12 MAX_VOLUME_GUESS=30 POD_TIMEOUT_SECONDS=60 ./find_attachment_limit 'node.kubernetes.io/instance-type:m5.large' | ||
``` | ||
|
||
generate_example_manifest.go: Generate a yaml file containing a pod associated with a specified amount of PVCs based off of the template file `device_slot_test.tmpl` | ||
|
||
Example: | ||
``` | ||
go run "generate_example_manifest.go" --node-affinity "some.label:value" --volume-count "22" --test-pod-name "test-pod" | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
apiVersion: v1 | ||
kind: Pod | ||
metadata: | ||
name: {{ .PodName }} | ||
spec: | ||
affinity: | ||
nodeAffinity: | ||
requiredDuringSchedulingIgnoredDuringExecution: | ||
nodeSelectorTerms: | ||
- matchExpressions: | ||
- key: {{ .NodeAffinityKey }} | ||
operator: In | ||
values: | ||
- {{ .NodeAffinityValue }} | ||
containers: | ||
- name: device-limit-tester-{{ len .Volumes }}-volumes | ||
image: centos | ||
command: ["/bin/sh"] | ||
args: ["-c", "while true; do echo hello; sleep 10;done"] | ||
volumeMounts: | ||
{{- range $index, $value := .Volumes }} | ||
- name: persistent-storage-{{ $index }} | ||
mountPath: /data-{{ $index }} | ||
{{- end }} | ||
volumes: | ||
{{- range $index, $value := .Volumes }} | ||
- name: persistent-storage-{{ $index }} | ||
persistentVolumeClaim: | ||
claimName: ebs-claim-{{ $index }} | ||
{{- end }} | ||
--- | ||
{{- range $index, $value := .Volumes }} | ||
apiVersion: v1 | ||
kind: PersistentVolumeClaim | ||
metadata: | ||
name: ebs-claim-{{ $index }} | ||
spec: | ||
accessModes: | ||
- ReadWriteOnce | ||
storageClassName: ebs-sc | ||
resources: | ||
requests: | ||
storage: 4Gi | ||
--- | ||
{{- end }} | ||
apiVersion: storage.k8s.io/v1 | ||
kind: StorageClass | ||
metadata: | ||
name: ebs-sc | ||
provisioner: ebs.csi.aws.com | ||
volumeBindingMode: WaitForFirstConsumer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
#!/bin/bash | ||
# Copyright 2023 The Kubernetes Authors. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# --- | ||
|
||
set -euo pipefail # Exit on any error | ||
|
||
# --- Environment Variables | ||
export MIN_VOLUME_GUESS=${MIN_VOLUME_GUESS:=0} | ||
export MAX_VOLUME_GUESS=${MAX_VOLUME_GUESS:=130} | ||
export POD_TIMEOUT_SECONDS=${POD_TIMEOUT_SECONDS:=90} | ||
export EXTRA_LOGS_FILEPATH=${EXTRA_LOGS_FILEPATH:="/dev/null"} | ||
|
||
export TEST_POD_NAME=${TEST_POD_NAME:="attachment-limit-test-pod"} | ||
|
||
export SCRIPT_PATH ROOT_DIRECTORY GENERATE_MANIFEST_SCRIPT_FILEPATH | ||
SCRIPT_PATH=$(dirname $(realpath "$0")) | ||
ROOT_DIRECTORY="$SCRIPT_PATH/../.." | ||
GENERATE_MANIFEST_SCRIPT_FILEPATH="$ROOT_DIRECTORY/hack/cluster-debugging-scripts/generate_example_manifest.go" | ||
|
||
# --- Script Tools | ||
log() { | ||
printf "%s [INFO] - %s\n" "$(date +"%Y-%m-%d %H:%M:%S")" "${*}" >&2 | ||
} | ||
|
||
check_dependencies() { | ||
local readonly dependencies=("kubectl" "go") | ||
|
||
for cmd in "${dependencies[@]}"; do | ||
if ! command -v "${cmd}" &>/dev/null; then | ||
log "${cmd} could not be found, please install it." | ||
exit 1 | ||
fi | ||
done | ||
} | ||
|
||
# --- Script | ||
usage () { | ||
echo "Usage: $0 [NODE_AFFINITY]" | ||
echo "Examples:" | ||
echo "$0 'eks.amazonaws.com/nodegroup:test-nodegroup'" | ||
echo "$0 'node.kubernetes.io/instance-type:m5.large'" | ||
echo "You can also override the following environment variable defaults: MIN_VOLUME_GUESS=0 MAX_VOLUME_GUESS=130 POD_TIMEOUT_SECONDS=90 EXTRA_LOGS_FILEPATH='/dev/null'" | ||
echo "MIN_VOLUME_GUESS=12 MAX_VOLUME_GUESS=30 POD_TIMEOUT_SECONDS=60 $0 'node.kubernetes.io/instance-type:m5.large'" | ||
exit 1 | ||
} | ||
|
||
parse_args () { | ||
# Confirm 1 parameter | ||
[[ $# -ne 1 ]] && usage | ||
|
||
export NODE_AFFINITY_KEY_VALUE_PAIR=$1 | ||
} | ||
|
||
cleanup() { | ||
log "Deleting k8s objects associated with manifest $MANIFEST_FILE" | ||
kubectl delete -f "$MANIFEST_FILE" > "$EXTRA_LOGS_FILEPATH" 2>&1 | ||
test -f "$MANIFEST_FILE" && rm "$MANIFEST_FILE" | ||
} | ||
|
||
deploy_manifest() { | ||
VOLUME_COUNT=$1 | ||
log "Attempting to deploy pod with $VOLUME_COUNT PVCs on node with label '$NODE_AFFINITY_KEY_VALUE_PAIR'" | ||
|
||
# Create pod manifest for initial guess | ||
MANIFEST_FILE=$(mktemp) | ||
go run "$GENERATE_MANIFEST_SCRIPT_FILEPATH" --node-affinity "$NODE_AFFINITY_KEY_VALUE_PAIR" --volume-count "$VOLUME_COUNT" --test-pod-name "$TEST_POD_NAME" > "$MANIFEST_FILE" | ||
|
||
# Deploy pod to node | ||
log "Creating k8s objects associated with manifest $MANIFEST_FILE" | ||
kubectl create -f "$MANIFEST_FILE" > "$EXTRA_LOGS_FILEPATH" | ||
|
||
# Watch for success vs error code | ||
log "Waiting $POD_TIMEOUT_SECONDS seconds for 'pod/$TEST_POD_NAME to reach condition 'ready'" | ||
set +e | ||
kubectl wait --for=condition=ready --timeout="${POD_TIMEOUT_SECONDS}s" pod/"$TEST_POD_NAME" > "$EXTRA_LOGS_FILEPATH" 2>&1 | ||
WAS_POD_CREATED=$? | ||
set -e | ||
if [[ $WAS_POD_CREATED == 0 ]]; then | ||
log "Pod with $VOLUME_COUNT PVCs successfully deployed" | ||
else | ||
log "Pod with $VOLUME_COUNT PVCs did not successfully deploy" | ||
fi | ||
|
||
cleanup | ||
} | ||
|
||
main() { | ||
check_dependencies | ||
|
||
parse_args "$@" | ||
|
||
export WAS_POD_CREATED=0 # 0 is true in bash | ||
export MANIFEST_FILE | ||
trap 'cleanup' EXIT | ||
|
||
min=$MIN_VOLUME_GUESS | ||
max=$MAX_VOLUME_GUESS | ||
|
||
while (( min < max )); do | ||
# Compute the mean between min and max, rounded up to the superior unit | ||
current_volume_count=$(( (min + max + 1 ) / 2 )) | ||
deploy_manifest $current_volume_count | ||
if [[ $WAS_POD_CREATED == 0 ]] # 0 is True in bash | ||
then min=$current_volume_count | ||
else max=$((current_volume_count - 1)) | ||
fi | ||
done | ||
|
||
export MAX_ATTACHED_VOLUMES="$min" | ||
log "Success!" | ||
log "Maximum amount of volumes deployed with pod on node with label '$NODE_AFFINITY_KEY_VALUE_PAIR': $MAX_ATTACHED_VOLUMES" | ||
trap - EXIT | ||
return "$MAX_ATTACHED_VOLUMES" | ||
} | ||
|
||
main "$@" |
76 changes: 76 additions & 0 deletions
76
hack/cluster-debugging-scripts/generate_example_manifest.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
/* | ||
Copyright 2018 The Kubernetes Authors. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package main | ||
|
||
import ( | ||
"flag" | ||
"fmt" | ||
"log" | ||
"os" | ||
"strings" | ||
"text/template" | ||
) | ||
|
||
type Manifest struct { | ||
NodeAffinityKey string | ||
NodeAffinityValue string | ||
PodName string | ||
Volumes []int | ||
} | ||
|
||
func main() { | ||
// Parse Command-Line args & flags | ||
nodeAffinityPtr := flag.String("node-affinity", "", "node affinity for pod in form of 'key:value'") | ||
podNamePtr := flag.String("test-pod-name", "test-pod", "name for pod used in manifest. Default is 'test-pod'") | ||
volumeCountPtr := flag.Int("volume-count", 2, "amount of Volumes to provision") | ||
flag.Parse() | ||
|
||
nodeAffinityKey, nodeAffinityValue, err := parseNodeAffinityFlag(nodeAffinityPtr) | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
|
||
manifest := Manifest{ | ||
NodeAffinityKey: nodeAffinityKey, | ||
NodeAffinityValue: nodeAffinityValue, | ||
PodName: *podNamePtr, | ||
Volumes: make([]int, *volumeCountPtr), | ||
} | ||
|
||
// Generate manifest to stdout from template file | ||
var tmplFile = "device_slot_test.tmpl" | ||
tmpl, err := template.New(tmplFile).ParseFiles(tmplFile) | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
err = tmpl.Execute(os.Stdout, manifest) | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
} | ||
|
||
func parseNodeAffinityFlag(nodeAffinityPtr *string) (string, string, error) { | ||
nodeAffinityKey := "" | ||
nodeAffinityValue := "" | ||
if len(*nodeAffinityPtr) > 0 { | ||
nodeAffinity := strings.Split(*nodeAffinityPtr, ":") | ||
if len(nodeAffinity) != 2 { | ||
return "", "", fmt.Errorf("flag '--node-affinity' must take the form 'key:value'") | ||
} | ||
nodeAffinityKey = nodeAffinity[0] | ||
nodeAffinityValue = nodeAffinity[1] | ||
} | ||
return nodeAffinityKey, nodeAffinityValue, nil | ||
} |
Oops, something went wrong.