Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Argo Rollout controller object as a first class citizen #241

Merged
merged 3 commits into from
Aug 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

NAME ?= adobe/k8s-shredder
K8S_SHREDDER_VERSION ?= "dev"
KINDNODE_VERSION ?= "v1.28.0"
KINDNODE_VERSION ?= "v1.30.4"
COMMIT ?= $(shell git rev-parse --short HEAD)
TEST_CLUSTERNAME ?= "k8s-shredder-test-cluster"

Expand Down
2 changes: 1 addition & 1 deletion charts/k8s-shredder/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ maintainers:
email: [email protected]
url: https://adobe.com

version: 0.1.0
version: 0.1.1
appVersion: v0.2.1
3 changes: 3 additions & 0 deletions charts/k8s-shredder/templates/cluster-role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,7 @@ rules:
- apiGroups: [apps, extensions]
resources: [statefulsets, deployments, replicasets]
verbs: [get, list, watch, update, patch]
- apiGroups: [ "argoproj.io" ]
resources: [ rollouts ]
verbs: [ get, list, watch, update, patch ]
{{ end }}
2 changes: 2 additions & 0 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ func discoverConfig() {
viper.SetDefault("RestartedAtAnnotation", "shredder.ethos.adobe.net/restartedAt")
viper.SetDefault("AllowEvictionLabel", "shredder.ethos.adobe.net/allow-eviction")
viper.SetDefault("ToBeDeletedTaint", "ToBeDeletedByClusterAutoscaler")
viper.SetDefault("ArgoRolloutsAPIVersion", "v1alpha1")

err := viper.ReadInConfig()
if err != nil {
Expand Down Expand Up @@ -144,6 +145,7 @@ func parseConfig() {
"RestartedAtAnnotation": cfg.RestartedAtAnnotation,
"AllowEvictionLabel": cfg.AllowEvictionLabel,
"ToBeDeletedTaint": cfg.ToBeDeletedTaint,
"ArgoRolloutsAPIVersion": cfg.ArgoRolloutsAPIVersion,
}).Info("Loaded configuration")
}

Expand Down
122 changes: 83 additions & 39 deletions internal/testing/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import (
log "github.com/sirupsen/logrus"
"golang.org/x/exp/slices"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime/schema"
"os"
"strings"
"testing"
Expand All @@ -47,6 +49,56 @@ var (
}
)

func grabMetrics(shredderMetrics []string, t *testing.T) []string {
results := make([]string, 0)
warnings := make([]string, 0)

for _, shredderMetric := range shredderMetrics {
result, warning, err := prometheusQuery(shredderMetric)
if err != nil {
t.Errorf("Error querying Prometheus: %v\n", err)
}
warnings = append(warnings, warning...)
results = append(results, result.String())
}

if len(warnings) > 0 {
t.Logf("Warnings: %v\n", strings.Join(warnings, "\n"))
}

t.Logf("Results: \n%v\n", strings.Join(results, "\n"))

return results
}

func prometheusQuery(query string) (model.Value, v1.Warnings, error) {

client, err := api.NewClient(api.Config{
Address: "http://localhost:30007",
})
if err != nil {
fmt.Printf("Error creating client: %v\n", err)
os.Exit(1)
}

v1api := v1.NewAPI(client)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
return v1api.Query(ctx, query, time.Now(), v1.WithTimeout(5*time.Second))
}

func compareTime(expirationTime time.Time, t *testing.T, ch chan time.Time) {
currentTime := time.Now().UTC()

for !currentTime.After(expirationTime.UTC()) {
t.Logf("Node TTL haven't expired yet: current time(UTC): %s, expire time(UTC): %s", currentTime, expirationTime.UTC())
time.Sleep(10 * time.Second)
currentTime = time.Now().UTC()

}
ch <- currentTime
}

// Validates that k8s-shredder cleanup a parked node after its TTL expires
func TestNodeIsCleanedUp(t *testing.T) {
var err error
Expand Down Expand Up @@ -110,18 +162,7 @@ func TestNodeIsCleanedUp(t *testing.T) {
}
}

func compareTime(expirationTime time.Time, t *testing.T, ch chan time.Time) {
currentTime := time.Now().UTC()

for !currentTime.After(expirationTime.UTC()) {
t.Logf("Node TTL haven't expired yet: current time(UTC): %s, expire time(UTC): %s", currentTime, expirationTime.UTC())
time.Sleep(10 * time.Second)
currentTime = time.Now().UTC()

}
ch <- currentTime
}

// Validates shredder metrics
func TestShredderMetrics(t *testing.T) {

// Intentionally skipped the gauge metrics as they are going to be wiped out before every eviction loop
Expand All @@ -140,40 +181,43 @@ func TestShredderMetrics(t *testing.T) {
}
}

func grabMetrics(shredderMetrics []string, t *testing.T) []string {
results := make([]string, 0)
warnings := make([]string, 0)
func TestArgoRolloutRestartAt(t *testing.T) {
var err error

for _, shredderMetric := range shredderMetrics {
result, warning, err := prometheusQuery(shredderMetric)
if err != nil {
t.Errorf("Error querying Prometheus: %v\n", err)
}
warnings = append(warnings, warning...)
results = append(results, result.String())
}
appContext, err := utils.NewAppContext(config.Config{
ParkedNodeTTL: 30 * time.Second,
EvictionLoopInterval: 10 * time.Second,
RollingRestartThreshold: 0.1,
UpgradeStatusLabel: "shredder.ethos.adobe.net/upgrade-status",
ExpiresOnLabel: "shredder.ethos.adobe.net/parked-node-expires-on",
NamespacePrefixSkipInitialEviction: "",
RestartedAtAnnotation: "shredder.ethos.adobe.net/restartedAt",
AllowEvictionLabel: "shredder.ethos.adobe.net/allow-eviction",
ToBeDeletedTaint: "ToBeDeletedByClusterAutoscaler",
ArgoRolloutsAPIVersion: "v1alpha1",
}, false)

if len(warnings) > 0 {
t.Logf("Warnings: %v\n", strings.Join(warnings, "\n"))
if err != nil {
log.Fatalf("Failed to setup application context: %s", err)
}

t.Logf("Results: \n%v\n", strings.Join(results, "\n"))

return results
}
gvr := schema.GroupVersionResource{
Group: "argoproj.io",
Version: appContext.Config.ArgoRolloutsAPIVersion,
Resource: "rollouts",
}

func prometheusQuery(query string) (model.Value, v1.Warnings, error) {
rollout, err := appContext.DynamicK8SClient.Resource(gvr).Namespace("ns-team-k8s-shredder-test").Get(appContext.Context, "test-app-argo-rollout", metav1.GetOptions{})
if err != nil {
log.Fatalf("Failed to get the Argo Rollout object: %s", err)
}
_, found, err := unstructured.NestedString(rollout.Object, "spec", "restartAt")

client, err := api.NewClient(api.Config{
Address: "http://localhost:30007",
})
if err != nil {
fmt.Printf("Error creating client: %v\n", err)
os.Exit(1)
log.Fatalf("Failed to get the Argo Rollout spec.restartAt field: %s", err)
}

v1api := v1.NewAPI(client)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
return v1api.Query(ctx, query, time.Now(), v1.WithTimeout(5*time.Second))
if !found {
t.Fatalf("Argo Rollout object does not have the spec.restartAt field set")
}
}
8 changes: 8 additions & 0 deletions internal/testing/local_env_prep.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,17 @@ kubectl apply -f "${test_dir}/k8s-shredder.yaml"
echo "KIND: deploying prometheus..."
kubectl apply -f "${test_dir}/prometheus_stuffs.yaml"

echo "KIND: deploying Argo Rollouts CRD..."
kubectl apply -f https://raw.githubusercontent.com/argoproj/argo-rollouts/v1.7.2/manifests/crds/rollout-crd.yaml

echo "KIND: deploying test applications..."
kubectl apply -f "${test_dir}/test_apps.yaml"

# Adjust the correct UID for the test-app-argo-rollout ownerReference
rollout_uid=$(kubectl -n ns-team-k8s-shredder-test get rollout test-app-argo-rollout -ojsonpath='{.metadata.uid}')
cat "${test_dir}/test_apps.yaml" | sed "s/REPLACE_WITH_ROLLOUT_UID/${rollout_uid}/" | kubectl apply -f -


echo "K8S_SHREDDER: waiting for k8s-shredder deployment to become ready!"
retry_count=0
i=1
Expand Down
2 changes: 1 addition & 1 deletion internal/testing/prometheus_stuffs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
spec:
containers:
- name: prometheus
image: prom/prometheus:v2.42.0
image: prom/prometheus:v2.54.1
args:
- "--storage.tsdb.retention.time=1h"
- "--config.file=/etc/prometheus/prometheus.yml"
Expand Down
3 changes: 3 additions & 0 deletions internal/testing/rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,7 @@ rules:
- apiGroups: [apps, extensions]
resources: [statefulsets, deployments, replicasets]
verbs: [get, list, watch, update, patch]
- apiGroups: [ "argoproj.io" ]
resources: [ rollouts ]
verbs: [ get, list, watch, update, patch ]

67 changes: 66 additions & 1 deletion internal/testing/test_apps.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -239,4 +239,69 @@ spec:
minAvailable: 1
selector:
matchLabels:
app: test-app-statefulset
app: test-app-statefulset
#### FLEX ####
# 1. Good citizen Argo Rollout in Flex world
---
apiVersion: apps/v1
kind: ReplicaSet
metadata:
name: test-app-argo-rollout
namespace: ns-team-k8s-shredder-test
ownerReferences:
- apiVersion: argoproj.io/v1alpha1
kind: Rollout
blockOwnerDeletion: true
name: test-app-argo-rollout
uid: REPLACE_WITH_ROLLOUT_UID
spec:
replicas: 2
selector:
matchLabels:
app: test-app-argo-rollout
template:
metadata:
labels:
app: test-app-argo-rollout
spec:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: app
operator: In
values:
- test-app-argo-rollout
topologyKey: kubernetes.io/hostname
containers:
- name: test-app-argo-rollout
image: aaneci/canary
ports:
- containerPort: 8080
name: web
---
apiVersion: argoproj.io/v1alpha1
kind: Rollout
metadata:
name: test-app-argo-rollout
namespace: ns-team-k8s-shredder-test
spec:
replicas: 2
workloadRef:
apiVersion: apps/v1
kind: ReplicaSet
name: test-app-argo-rollout
---
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: test-app-argo-rollout
namespace: ns-team-k8s-shredder-test
spec:
minAvailable: 10
selector:
matchLabels:
app: test-app-argo-rollout
2 changes: 2 additions & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,6 @@ type Config struct {
AllowEvictionLabel string
// ToBeDeletedTaint is used for skipping a subset of parked nodes
ToBeDeletedTaint string
// ArgoRolloutsAPIVersion is used for specifying the API version from `argoproj.io` apigroup to be used while handling Argo Rollouts objects
ArgoRolloutsAPIVersion string
}
Loading
Loading