Skip to content

Commit

Permalink
Support Argo Rollout controller object as a first class citizen
Browse files Browse the repository at this point in the history
  • Loading branch information
adriananeci committed Aug 29, 2024
1 parent 1c67bef commit 2ae2dc5
Show file tree
Hide file tree
Showing 13 changed files with 265 additions and 65 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

NAME ?= adobe/k8s-shredder
K8S_SHREDDER_VERSION ?= "dev"
KINDNODE_VERSION ?= "v1.28.0"
KINDNODE_VERSION ?= "v1.30.4"
COMMIT ?= $(shell git rev-parse --short HEAD)
TEST_CLUSTERNAME ?= "k8s-shredder-test-cluster"

Expand Down
4 changes: 2 additions & 2 deletions charts/k8s-shredder/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ maintainers:
email: [email protected]
url: https://adobe.com

version: 0.1.0
appVersion: v0.2.1
version: 0.1.1
appVersion: v0.2.2
3 changes: 3 additions & 0 deletions charts/k8s-shredder/templates/cluster-role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,7 @@ rules:
- apiGroups: [apps, extensions]
resources: [statefulsets, deployments, replicasets]
verbs: [get, list, watch, update, patch]
- apiGroups: [ "argoproj.io" ]
resources: [ rollouts ]
verbs: [ get, list, watch, update, patch ]
{{ end }}
2 changes: 2 additions & 0 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ func discoverConfig() {
viper.SetDefault("RestartedAtAnnotation", "shredder.ethos.adobe.net/restartedAt")
viper.SetDefault("AllowEvictionLabel", "shredder.ethos.adobe.net/allow-eviction")
viper.SetDefault("ToBeDeletedTaint", "ToBeDeletedByClusterAutoscaler")
viper.SetDefault("ArgoRolloutsAPIVersion", "v1alpha1")

err := viper.ReadInConfig()
if err != nil {
Expand Down Expand Up @@ -144,6 +145,7 @@ func parseConfig() {
"RestartedAtAnnotation": cfg.RestartedAtAnnotation,
"AllowEvictionLabel": cfg.AllowEvictionLabel,
"ToBeDeletedTaint": cfg.ToBeDeletedTaint,
"ArgoRolloutsAPIVersion": cfg.ArgoRolloutsAPIVersion,
}).Info("Loaded configuration")
}

Expand Down
122 changes: 83 additions & 39 deletions internal/testing/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import (
log "github.com/sirupsen/logrus"
"golang.org/x/exp/slices"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime/schema"
"os"
"strings"
"testing"
Expand All @@ -47,6 +49,56 @@ var (
}
)

func grabMetrics(shredderMetrics []string, t *testing.T) []string {
results := make([]string, 0)
warnings := make([]string, 0)

for _, shredderMetric := range shredderMetrics {
result, warning, err := prometheusQuery(shredderMetric)
if err != nil {
t.Errorf("Error querying Prometheus: %v\n", err)
}
warnings = append(warnings, warning...)
results = append(results, result.String())
}

if len(warnings) > 0 {
t.Logf("Warnings: %v\n", strings.Join(warnings, "\n"))
}

t.Logf("Results: \n%v\n", strings.Join(results, "\n"))

return results
}

func prometheusQuery(query string) (model.Value, v1.Warnings, error) {

client, err := api.NewClient(api.Config{
Address: "http://localhost:30007",
})
if err != nil {
fmt.Printf("Error creating client: %v\n", err)
os.Exit(1)
}

v1api := v1.NewAPI(client)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
return v1api.Query(ctx, query, time.Now(), v1.WithTimeout(5*time.Second))
}

func compareTime(expirationTime time.Time, t *testing.T, ch chan time.Time) {
currentTime := time.Now().UTC()

for !currentTime.After(expirationTime.UTC()) {
t.Logf("Node TTL haven't expired yet: current time(UTC): %s, expire time(UTC): %s", currentTime, expirationTime.UTC())
time.Sleep(10 * time.Second)
currentTime = time.Now().UTC()

}
ch <- currentTime
}

// Validates that k8s-shredder cleanup a parked node after its TTL expires
func TestNodeIsCleanedUp(t *testing.T) {
var err error
Expand Down Expand Up @@ -110,18 +162,7 @@ func TestNodeIsCleanedUp(t *testing.T) {
}
}

func compareTime(expirationTime time.Time, t *testing.T, ch chan time.Time) {
currentTime := time.Now().UTC()

for !currentTime.After(expirationTime.UTC()) {
t.Logf("Node TTL haven't expired yet: current time(UTC): %s, expire time(UTC): %s", currentTime, expirationTime.UTC())
time.Sleep(10 * time.Second)
currentTime = time.Now().UTC()

}
ch <- currentTime
}

// Validates shredder metrics
func TestShredderMetrics(t *testing.T) {

// Intentionally skipped the gauge metrics as they are going to be wiped out before every eviction loop
Expand All @@ -140,40 +181,43 @@ func TestShredderMetrics(t *testing.T) {
}
}

func grabMetrics(shredderMetrics []string, t *testing.T) []string {
results := make([]string, 0)
warnings := make([]string, 0)
func TestArgoRolloutRestartAt(t *testing.T) {
var err error

for _, shredderMetric := range shredderMetrics {
result, warning, err := prometheusQuery(shredderMetric)
if err != nil {
t.Errorf("Error querying Prometheus: %v\n", err)
}
warnings = append(warnings, warning...)
results = append(results, result.String())
}
appContext, err := utils.NewAppContext(config.Config{
ParkedNodeTTL: 30 * time.Second,
EvictionLoopInterval: 10 * time.Second,
RollingRestartThreshold: 0.1,
UpgradeStatusLabel: "shredder.ethos.adobe.net/upgrade-status",
ExpiresOnLabel: "shredder.ethos.adobe.net/parked-node-expires-on",
NamespacePrefixSkipInitialEviction: "",
RestartedAtAnnotation: "shredder.ethos.adobe.net/restartedAt",
AllowEvictionLabel: "shredder.ethos.adobe.net/allow-eviction",
ToBeDeletedTaint: "ToBeDeletedByClusterAutoscaler",
ArgoRolloutsAPIVersion: "v1alpha1",
}, false)

if len(warnings) > 0 {
t.Logf("Warnings: %v\n", strings.Join(warnings, "\n"))
if err != nil {
log.Fatalf("Failed to setup application context: %s", err)
}

t.Logf("Results: \n%v\n", strings.Join(results, "\n"))

return results
}
gvr := schema.GroupVersionResource{
Group: "argoproj.io",
Version: appContext.Config.ArgoRolloutsAPIVersion,
Resource: "rollouts",
}

func prometheusQuery(query string) (model.Value, v1.Warnings, error) {
rollout, err := appContext.DynamicK8SClient.Resource(gvr).Namespace("ns-team-k8s-shredder-test").Get(appContext.Context, "test-app-argo-rollout", metav1.GetOptions{})
if err != nil {
log.Fatalf("Failed to get the Argo Rollout object: %s", err)
}
_, found, err := unstructured.NestedString(rollout.Object, "spec", "restartAt")

client, err := api.NewClient(api.Config{
Address: "http://localhost:30007",
})
if err != nil {
fmt.Printf("Error creating client: %v\n", err)
os.Exit(1)
log.Fatalf("Failed to get the Argo Rollout spec.restartAt field: %s", err)
}

v1api := v1.NewAPI(client)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
return v1api.Query(ctx, query, time.Now(), v1.WithTimeout(5*time.Second))
if !found {
t.Fatalf("Argo Rollout object does not have the spec.restartAt field set")
}
}
8 changes: 8 additions & 0 deletions internal/testing/local_env_prep.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,17 @@ kubectl apply -f "${test_dir}/k8s-shredder.yaml"
echo "KIND: deploying prometheus..."
kubectl apply -f "${test_dir}/prometheus_stuffs.yaml"

echo "KIND: deploying Argo Rollouts CRD..."
kubectl apply -f https://raw.githubusercontent.com/argoproj/argo-rollouts/v1.7.2/manifests/crds/rollout-crd.yaml

echo "KIND: deploying test applications..."
kubectl apply -f "${test_dir}/test_apps.yaml"

# Adjust the correct UID for the test-app-argo-rollout ownerReference
rollout_uid=$(kubectl -n ns-team-k8s-shredder-test get rollout test-app-argo-rollout -ojsonpath='{.metadata.uid}')
cat "${test_dir}/test_apps.yaml" | sed "s/REPLACE_WITH_ROLLOUT_UID/${rollout_uid}/" | kubectl apply -f -


echo "K8S_SHREDDER: waiting for k8s-shredder deployment to become ready!"
retry_count=0
i=1
Expand Down
2 changes: 1 addition & 1 deletion internal/testing/prometheus_stuffs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
spec:
containers:
- name: prometheus
image: prom/prometheus:v2.42.0
image: prom/prometheus:v2.54.1
args:
- "--storage.tsdb.retention.time=1h"
- "--config.file=/etc/prometheus/prometheus.yml"
Expand Down
3 changes: 3 additions & 0 deletions internal/testing/rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,7 @@ rules:
- apiGroups: [apps, extensions]
resources: [statefulsets, deployments, replicasets]
verbs: [get, list, watch, update, patch]
- apiGroups: [ "argoproj.io" ]
resources: [ rollouts ]
verbs: [ get, list, watch, update, patch ]

56 changes: 55 additions & 1 deletion internal/testing/test_apps.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -239,4 +239,58 @@ spec:
minAvailable: 1
selector:
matchLabels:
app: test-app-statefulset
app: test-app-statefulset
#### FLEX ####
# 1. Good citizen Argo Rollout in Flex world
---
apiVersion: apps/v1
kind: ReplicaSet
metadata:
name: test-app-argo-rollout
namespace: ns-team-k8s-shredder-test
ownerReferences:
- apiVersion: argoproj.io/v1alpha1
kind: Rollout
blockOwnerDeletion: true
name: test-app-argo-rollout
uid: REPLACE_WITH_ROLLOUT_UID
spec:
replicas: 2
selector:
matchLabels:
app: test-app-argo-rollout
template:
metadata:
labels:
app: test-app-argo-rollout
spec:
terminationGracePeriodSeconds: 10
containers:
- name: test-app-argo-rollout
image: aaneci/canary
ports:
- containerPort: 8080
name: web
---
apiVersion: argoproj.io/v1alpha1
kind: Rollout
metadata:
name: test-app-argo-rollout
namespace: ns-team-k8s-shredder-test
spec:
replicas: 2
workloadRef:
apiVersion: apps/v1
kind: ReplicaSet
name: test-app-argo-rollout
---
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: test-app-argo-rollout
namespace: ns-team-k8s-shredder-test
spec:
minAvailable: 10
selector:
matchLabels:
app: test-app-argo-rollout
2 changes: 2 additions & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,6 @@ type Config struct {
AllowEvictionLabel string
// ToBeDeletedTaint is used for skipping a subset of parked nodes
ToBeDeletedTaint string
// ArgoRolloutsAPIVersion is used for specifying the API version from `argoproj.io` apigroup to be used while handling Argo Rollouts objects
ArgoRolloutsAPIVersion string
}
Loading

0 comments on commit 2ae2dc5

Please sign in to comment.