change mtbf definition, support mean time in hours and minutes

asobti · Jan 2, 2020 · 175ec88 · 175ec88
1 parent 90c9832
commit 175ec88
Show file tree

Hide file tree

Showing 17 changed files with 116 additions and 109 deletions.
diff --git a/README.md b/README.md
@@ -21,8 +21,8 @@ kube-monkey works on an opt-in model and will only schedule terminations for Kub
 Opt-in is done by setting the following labels on a k8s app:
 
 **`kube-monkey/enabled`**: Set to **`"enabled"`** to opt-in to kube-monkey  
-**`kube-monkey/mtbf`**: Mean time between failure (in days). For example, if set to **`"3"`**, the k8s app can expect to have a Pod
-killed approximately every third weekday.  
+**`kube-monkey/mtbf`**: Mean time between failure (in hours/minutes etc. Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h".). For example, if set to **`"3h"`**, the k8s app can expect to have a Pod
+killed approximately every 3 hours. 
 **`kube-monkey/identifier`**: A unique identifier for the k8s apps. This is used to identify the pods
 that belong to a k8s app as Pods inherit labels from their k8s app. So, if kube-monkey detects that app `foo` has enrolled to be a victim, kube-monkey will look for all pods that have the label `kube-monkey/identifier: foo` to determine which pods are candidates for killing. Recommendation is to set this value to be the same as the app's name.  
 **`kube-monkey/kill-mode`**: Default behavior is for kube-monkey to kill only ONE pod of your app. You can override this behavior by setting the value to:
@@ -52,7 +52,7 @@ spec:
       labels:
         kube-monkey/enabled: enabled
         kube-monkey/identifier: monkey-victim
-        kube-monkey/mtbf: '2'
+        kube-monkey/mtbf: '2h'
         kube-monkey/kill-mode: "fixed"
         kube-monkey/kill-value: '1'
 [... omitted ...]
@@ -70,7 +70,7 @@ metadata:
   labels:
     kube-monkey/enabled: enabled
     kube-monkey/identifier: monkey-victim
-    kube-monkey/mtbf: '2'
+    kube-monkey/mtbf: '2h'
     kube-monkey/kill-mode: "fixed"
     kube-monkey/kill-value: '1'
 spec:

diff --git a/calendar/calendar.go b/calendar/calendar.go
@@ -65,3 +65,35 @@ func RandomTimeInRange(startHour int, endHour int, loc *time.Location) time.Time
 	rangeStart := time.Date(year, month, date, startHour, 0, 0, 0, loc)
 	return rangeStart.Add(offsetDuration)
 }
+
+func CustzRandomTimeInRange(mtbf string, startHour, endHour int, loc *time.Location) time.Time {
+	tmptimeDuration, err := time.ParseDuration(mtbf)
+	if err != nil {
+		glog.Errorf("error parsing customized mtbf %s: %v", mtbf, err)
+		return time.Now().Add(time.Duration(24*365*10) * time.Hour)
+	}
+	//time range should be twice of the input mean time between failure value
+	timeDuration := tmptimeDuration * 2
+	//compute random offset time
+	now := time.Now().In(loc)
+	mtbfEndTime := now.Add(timeDuration)
+	subSecond := int64(mtbfEndTime.Sub(now) / time.Second)
+	r := rand.New(rand.NewSource(time.Now().UnixNano()))
+	randSecondOffset := r.Int63n(subSecond)
+	randCalTime := now.Add(time.Duration(randSecondOffset) * time.Second)
+
+	// compute randSecondOffset between start and end hour
+	year, month, date := now.Date()
+	todayEndTime := time.Date(year, month, date, endHour, 0, 0, 0, loc)
+	todayStartTime := time.Date(year, month, date, startHour, 0, 0, 0, loc)
+	if randCalTime.Before(todayEndTime) { // time offset before today's endHour
+		glog.V(1).Infof("CustzRandomTimeInRange calculate time %s", randCalTime)
+		return randCalTime
+	} else {
+		leftOffset := randSecondOffset - int64(todayEndTime.Sub(now)/time.Second)
+		offsetDay := leftOffset/(int64(endHour-startHour)*60*60) + 1
+		modOffsetSecond := leftOffset % (int64(endHour-startHour) * 60 * 60)
+		glog.V(1).Infof("CustzRandomTimeInRange calculate time %s", todayStartTime.Add(time.Duration(offsetDay*24)*time.Hour).Add(time.Duration(modOffsetSecond)*time.Second))
+		return todayStartTime.Add(time.Duration(offsetDay*24) * time.Hour).Add(time.Duration(modOffsetSecond) * time.Second)
+	}
+}
diff --git a/calendar/calendar_test.go b/calendar/calendar_test.go
@@ -86,3 +86,23 @@ func TestRandomTimeInRange(t *testing.T) {
 
 	assert.Condition(t, scheduledTime)
 }
+
+func TestCustzRandomTimeInRange(t *testing.T) {
+	loc := time.UTC
+
+	monkey.Patch(time.Now, func() time.Time {
+		return time.Date(2018, 4, 16, 12, 0, 0, 0, time.UTC)
+	})
+	defer monkey.Unpatch(time.Now)
+
+	randomTime := CustzRandomTimeInRange("5h", 10, 12, loc)
+
+	scheduledTime := func() (success bool) {
+		if randomTime.Hour() >= 10 && randomTime.Hour() <= 12 {
+			success = true
+		}
+		return
+	}
+
+	assert.Condition(t, scheduledTime)
+}
diff --git a/chaos/chaosmock.go b/chaos/chaosmock.go
@@ -72,7 +72,7 @@ func (vm *victimMock) IsWhitelisted() bool {
 }
 
 func newVictimMock() *victimMock {
-	v := victims.New(KIND, NAME, NAMESPACE, IDENTIFIER, 1)
+	v := victims.New(KIND, NAME, NAMESPACE, IDENTIFIER, "1h")
 	return &victimMock{
 		VictimBase: *v,
 	}

diff --git a/schedule/schedule.go b/schedule/schedule.go
@@ -72,25 +72,24 @@ func New() (*Schedule, error) {
 	}
 
 	for _, victim := range victims {
-		killtime := CalculateKillTime()
+		killtime := CalculateKillTime(victim.Mtbf())
+
+		schedule.Add(chaos.New(killtime, victim))
 
-		if ShouldScheduleChaos(victim.Mtbf()) {
-			schedule.Add(chaos.New(killtime, victim))
-		}
 	}
 
 	return schedule, nil
 }
 
-func CalculateKillTime() time.Time {
+func CalculateKillTime(mtbf string) time.Time {
 	loc := config.Timezone()
 	if config.DebugEnabled() && config.DebugScheduleImmediateKill() {
 		r := rand.New(rand.NewSource(time.Now().UnixNano()))
 		// calculate a second-offset in the next minute
 		secOffset := r.Intn(60)
 		return time.Now().In(loc).Add(time.Duration(secOffset) * time.Second)
 	}
-	return calendar.RandomTimeInRange(config.StartHour(), config.EndHour(), loc)
+	return calendar.CustzRandomTimeInRange(mtbf, config.StartHour(), config.EndHour(), loc)
 }
 
 func ShouldScheduleChaos(mtbf int) bool {

diff --git a/schedule/schedule_test.go b/schedule/schedule_test.go
@@ -65,7 +65,7 @@ func TestStringWithEntries(t *testing.T) {
 
 func TestCalculateKillTimeRandom(t *testing.T) {
 	config.SetDefaults()
-	killtime := CalculateKillTime()
+	killtime := CalculateKillTime("1h")
 
 	scheduledTime := func() (success bool) {
 		if killtime.Hour() >= config.StartHour() && killtime.Hour() <= config.EndHour() {
@@ -83,7 +83,7 @@ func TestCalculateKillTimeNow(t *testing.T) {
 	config.SetDefaults()
 	viper.SetDefault(param.DebugEnabled, true)
 	viper.SetDefault(param.DebugScheduleImmediateKill, true)
-	killtime := CalculateKillTime()
+	killtime := CalculateKillTime("1h")
 
 	assert.Equal(t, killtime.Location(), config.Timezone())
 	assert.WithinDuration(t, killtime, time.Now(), time.Second*time.Duration(60))

diff --git a/victims/factory/daemonsets/daemonsets.go b/victims/factory/daemonsets/daemonsets.go
@@ -2,7 +2,7 @@ package daemonsets
 
 import (
 	"fmt"
-	"strconv"
+	"time"
 
 	"github.com/asobti/kube-monkey/config"
 	"github.com/asobti/kube-monkey/victims"
@@ -44,20 +44,16 @@ func identifier(kubekind *v1.DaemonSet) (string, error) {
 
 // Read the mean-time-between-failures value defined by the DaemonSet
 // in the label defined by config.MtbfLabelKey
-func meanTimeBetweenFailures(kubekind *v1.DaemonSet) (int, error) {
+func meanTimeBetweenFailures(kubekind *v1.DaemonSet) (string, error) {
 	mtbf, ok := kubekind.Labels[config.MtbfLabelKey]
 	if !ok {
-		return -1, fmt.Errorf("%T %s does not have %s label", kubekind, kubekind.Name, config.MtbfLabelKey)
+		return "", fmt.Errorf("%T %s does not have %s label", kubekind, kubekind.Name, config.MtbfLabelKey)
 	}
 
-	mtbfInt, err := strconv.Atoi(mtbf)
+	_, err := time.ParseDuration(mtbf)
 	if err != nil {
-		return -1, err
+		return "", fmt.Errorf("error parsing mtbf %s: %v", mtbf, err)
 	}
 
-	if !(mtbfInt > 0) {
-		return -1, fmt.Errorf("Invalid value for label %s: %d", config.MtbfLabelKey, mtbfInt)
-	}
-
-	return mtbfInt, nil
+	return mtbf, nil
 }
diff --git a/victims/factory/daemonsets/daemonsets_test.go b/victims/factory/daemonsets/daemonsets_test.go
@@ -32,7 +32,7 @@ func TestNew(t *testing.T) {
 		NAME,
 		map[string]string{
 			config.IdentLabelKey: IDENTIFIER,
-			config.MtbfLabelKey:  "1",
+			config.MtbfLabelKey:  "1h",
 		},
 	)
 	ds, err := New(&v1ds)
@@ -42,14 +42,14 @@ func TestNew(t *testing.T) {
 	assert.Equal(t, NAME, ds.Name())
 	assert.Equal(t, NAMESPACE, ds.Namespace())
 	assert.Equal(t, IDENTIFIER, ds.Identifier())
-	assert.Equal(t, 1, ds.Mtbf())
+	assert.Equal(t, "1h", ds.Mtbf())
 }
 
 func TestInvalidIdentifier(t *testing.T) {
 	v1ds := newDaemonSet(
 		NAME,
 		map[string]string{
-			config.MtbfLabelKey: "1",
+			config.MtbfLabelKey: "1h",
 		},
 	)
 	_, err := New(&v1ds)
@@ -77,16 +77,6 @@ func TestInvalidMtbf(t *testing.T) {
 	)
 	_, err = New(&v1ds)
 
-	assert.Errorf(t, err, "Expected an error if "+config.MtbfLabelKey+" label can't be converted a Int type")
+	assert.Errorf(t, err, "Expected an error if "+config.MtbfLabelKey+" label can't be converted a time.Duration type")
 
-	v1ds = newDaemonSet(
-		NAME,
-		map[string]string{
-			config.IdentLabelKey: IDENTIFIER,
-			config.MtbfLabelKey:  "0",
-		},
-	)
-	_, err = New(&v1ds)
-
-	assert.Errorf(t, err, "Expected an error if "+config.MtbfLabelKey+" label is lower than 1")
 }
diff --git a/victims/factory/daemonsets/eligible_daemonsets_test.go b/victims/factory/daemonsets/eligible_daemonsets_test.go
@@ -14,7 +14,7 @@ func TestEligibleDaemonSets(t *testing.T) {
 		NAME,
 		map[string]string{
 			"kube-monkey/identifier": "1",
-			"kube-monkey/mtbf":       "1",
+			"kube-monkey/mtbf":       "1h",
 		},
 	)
 
@@ -29,7 +29,7 @@ func TestIsEnrolled(t *testing.T) {
 		NAME,
 		map[string]string{
 			config.IdentLabelKey:   "1",
-			config.MtbfLabelKey:    "1",
+			config.MtbfLabelKey:    "1h",
 			config.EnabledLabelKey: config.EnabledLabelValue,
 		},
 	)
@@ -48,7 +48,7 @@ func TestIsNotEnrolled(t *testing.T) {
 		NAME,
 		map[string]string{
 			config.IdentLabelKey:   "1",
-			config.MtbfLabelKey:    "1",
+			config.MtbfLabelKey:    "1h",
 			config.EnabledLabelKey: "x",
 		},
 	)
@@ -65,7 +65,7 @@ func TestIsNotEnrolled(t *testing.T) {
 func TestKillType(t *testing.T) {
 
 	ident := "1"
-	mtbf := "1"
+	mtbf := "1h"
 	killMode := "kill-mode"
 
 	v1ds := newDaemonSet(
@@ -103,7 +103,7 @@ func TestKillType(t *testing.T) {
 func TestKillValue(t *testing.T) {
 
 	ident := "1"
-	mtbf := "1"
+	mtbf := "1h"
 	killValue := "0"
 
 	v1ds := newDaemonSet(

diff --git a/victims/factory/deployments/deployments.go b/victims/factory/deployments/deployments.go
@@ -2,10 +2,9 @@ package deployments
 
 import (
 	"fmt"
-	"strconv"
-
 	"github.com/asobti/kube-monkey/config"
 	"github.com/asobti/kube-monkey/victims"
+	"time"
 
 	"k8s.io/api/apps/v1"
 )
@@ -44,20 +43,16 @@ func identifier(kubekind *v1.Deployment) (string, error) {
 
 // Read the mean-time-between-failures value defined by the Deployment
 // in the label defined by config.MtbfLabelKey
-func meanTimeBetweenFailures(kubekind *v1.Deployment) (int, error) {
+func meanTimeBetweenFailures(kubekind *v1.Deployment) (string, error) {
 	mtbf, ok := kubekind.Labels[config.MtbfLabelKey]
 	if !ok {
-		return -1, fmt.Errorf("%T %s does not have %s label", kubekind, kubekind.Name, config.MtbfLabelKey)
+		return "", fmt.Errorf("%T %s does not have %s label", kubekind, kubekind.Name, config.MtbfLabelKey)
 	}
 
-	mtbfInt, err := strconv.Atoi(mtbf)
+	_, err := time.ParseDuration(mtbf)
 	if err != nil {
-		return -1, err
-	}
-
-	if !(mtbfInt > 0) {
-		return -1, fmt.Errorf("Invalid value for label %s: %d", config.MtbfLabelKey, mtbfInt)
+		return "", fmt.Errorf("error parsing mtbf %s: %v", mtbf, err)
 	}
 
-	return mtbfInt, nil
+	return mtbf, nil
 }
diff --git a/victims/factory/deployments/deployments_test.go b/victims/factory/deployments/deployments_test.go
@@ -32,7 +32,7 @@ func TestNew(t *testing.T) {
 		NAME,
 		map[string]string{
 			config.IdentLabelKey: IDENTIFIER,
-			config.MtbfLabelKey:  "1",
+			config.MtbfLabelKey:  "1h",
 		},
 	)
 	depl, err := New(&v1depl)
@@ -42,14 +42,14 @@ func TestNew(t *testing.T) {
 	assert.Equal(t, NAME, depl.Name())
 	assert.Equal(t, NAMESPACE, depl.Namespace())
 	assert.Equal(t, IDENTIFIER, depl.Identifier())
-	assert.Equal(t, 1, depl.Mtbf())
+	assert.Equal(t, "1h", depl.Mtbf())
 }
 
 func TestInvalidIdentifier(t *testing.T) {
 	v1depl := newDeployment(
 		NAME,
 		map[string]string{
-			config.MtbfLabelKey: "1",
+			config.MtbfLabelKey: "1h",
 		},
 	)
 	_, err := New(&v1depl)
@@ -77,16 +77,5 @@ func TestInvalidMtbf(t *testing.T) {
 	)
 	_, err = New(&v1depl)
 
-	assert.Errorf(t, err, "Expected an error if "+config.MtbfLabelKey+" label can't be converted a Int type")
-
-	v1depl = newDeployment(
-		NAME,
-		map[string]string{
-			config.IdentLabelKey: IDENTIFIER,
-			config.MtbfLabelKey:  "0",
-		},
-	)
-	_, err = New(&v1depl)
-
-	assert.Errorf(t, err, "Expected an error if "+config.MtbfLabelKey+" label is lower than 1")
+	assert.Errorf(t, err, "Expected an error if "+config.MtbfLabelKey+" label can't be converted a time.Duration type")
 }
diff --git a/victims/factory/deployments/eligible_deployments_test.go b/victims/factory/deployments/eligible_deployments_test.go
@@ -14,7 +14,7 @@ func TestEligibleDeployments(t *testing.T) {
 		NAME,
 		map[string]string{
 			"kube-monkey/identifier": "1",
-			"kube-monkey/mtbf":       "1",
+			"kube-monkey/mtbf":       "1h",
 		},
 	)
 
@@ -29,7 +29,7 @@ func TestIsEnrolled(t *testing.T) {
 		NAME,
 		map[string]string{
 			config.IdentLabelKey:   "1",
-			config.MtbfLabelKey:    "1",
+			config.MtbfLabelKey:    "1h",
 			config.EnabledLabelKey: config.EnabledLabelValue,
 		},
 	)
@@ -48,7 +48,7 @@ func TestIsNotEnrolled(t *testing.T) {
 		NAME,
 		map[string]string{
 			config.IdentLabelKey:   "1",
-			config.MtbfLabelKey:    "1",
+			config.MtbfLabelKey:    "1h",
 			config.EnabledLabelKey: "x",
 		},
 	)
@@ -65,7 +65,7 @@ func TestIsNotEnrolled(t *testing.T) {
 func TestKillType(t *testing.T) {
 
 	ident := "1"
-	mtbf := "1"
+	mtbf := "1h"
 	killMode := "kill-mode"
 
 	v1depl := newDeployment(
@@ -103,7 +103,7 @@ func TestKillType(t *testing.T) {
 func TestKillValue(t *testing.T) {
 
 	ident := "1"
-	mtbf := "1"
+	mtbf := "1h"
 	killValue := "0"
 
 	v1depl := newDeployment(