From cc28001ccb53c51f063c4c5e1d1662a3f0b22498 Mon Sep 17 00:00:00 2001 From: Nikolay Martyanov Date: Tue, 5 Nov 2024 21:58:29 +0100 Subject: [PATCH] pillar/watcher: Add unit tests for goroutine leak detection. Introduce a suite of tests to verify the functionality of the goroutine leak detection system, simulating various scenarios to ensure robust leak identification. Growth scenarios: Test scenarios for initial goroutine growth with stabilization, sudden spikes, and decreases after spikes to validate that these typical patterns do not trigger false positives. Leak scenario: Include tests where the goroutine count gradually increases, simulating a leak. The test verifies that the detector correctly identifies this pattern as a leak. Monitoring tests: Implement tests for the `goroutinesMonitor` function to check detection behavior at regular intervals. Edge cases for the helper function: Add tests to handle empty data inputs, minimal data length, and cases where the moving average window exceeds data length, ensuring resilience against edge cases. Signed-off-by: Nikolay Martyanov --- pkg/pillar/cmd/watcher/watcher_test.go | 340 +++++++++++++++++++++++++ 1 file changed, 340 insertions(+) create mode 100644 pkg/pillar/cmd/watcher/watcher_test.go diff --git a/pkg/pillar/cmd/watcher/watcher_test.go b/pkg/pillar/cmd/watcher/watcher_test.go new file mode 100644 index 0000000000..e37dbf29e4 --- /dev/null +++ b/pkg/pillar/cmd/watcher/watcher_test.go @@ -0,0 +1,340 @@ +// Copyright (c) 2024 Zededa, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package watcher + +import ( + "github.com/lf-edge/eve/pkg/pillar/agentlog" + "io" + "math" + "os" + "reflect" + "strings" + "sync" + "testing" + "time" +) + +// Scenario 1: Initial growth that slows down and then stabilizes +func emulateSystemStart(startDuration, stabilizationDuration int) []int { + totalDuration := startDuration + stabilizationDuration + data := make([]int, startDuration) + baseGoroutines := 0 + for i := 0; i < startDuration; i++ { + // Simulate fast growth that slows down + growth := int(500 * (1 - math.Exp(-float64(i)/10))) // Exponential decay + data[i] = baseGoroutines + growth + } + // Stabilize after growth + stableGoroutines := data[len(data)-1] + for i := startDuration; i < totalDuration; i++ { + data = append(data, stableGoroutines) + } + return data +} + +// Scenario 2: Stabilization, then a process creates a lot of goroutines quickly, which then stabilizes +func emulateSpikeAfterSystemStart(startDuration, stabilizationDuration, spikeDuration int) []int { + data := emulateSystemStart(startDuration, stabilizationDuration) + totalStartDuration := len(data) + baseGoroutines := data[len(data)-1] + for i := totalStartDuration; i < totalStartDuration+spikeDuration; i++ { + growth := int(100 * (1 - math.Exp(-float64(i-startDuration)/5))) // Quick spike + data = append(data, baseGoroutines+growth) + } + // Stabilize after spike + stableGoroutines := data[len(data)-1] + for i := totalStartDuration + spikeDuration; i < totalStartDuration+spikeDuration+stabilizationDuration; i++ { + data = append(data, stableGoroutines) + } + return data +} + +// Scenario 3: After the spike, goroutine count decreases +func emulateDecreaseAfterSpike(decreaseDuration, spikeDuration, stabilizationDuration, startDuration int) []int { + data := emulateSpikeAfterSystemStart(startDuration, stabilizationDuration, spikeDuration) + decreaseStart := len(data) + baseGoroutines := data[decreaseStart-1] + for i := 0; i < decreaseDuration; i++ { + decrease := int(float64(baseGoroutines) * (1 - float64(i)/float64(decreaseDuration))) + data = append(data, decrease) + } + return data +} + +// Scenario 4: After the spike, goroutine count starts to slowly increase over time +func emulateLeakAfterSpike(leakDuration, stabilizationDuration, spikeDuration, startDuration int) []int { + data := emulateSpikeAfterSystemStart(startDuration, stabilizationDuration, spikeDuration) + increaseStart := len(data) + baseGoroutines := data[increaseStart-1] + for i := increaseStart; i < increaseStart+leakDuration; i++ { + // Slow linear increase + growth := baseGoroutines + (i-increaseStart)*5 + data = append(data, growth) + } + return data +} + +func TestMain(m *testing.M) { + logger, log = agentlog.Init("watcher") + os.Exit(m.Run()) +} + +// Computes moving average correctly for valid data and window size +func TestMovingAverageValidData(t *testing.T) { + data := []int{1, 2, 3, 4, 5} + windowSize := 3 + expected := []float64{2.0, 3.0, 4.0} + + result := movingAverage(data, windowSize) + + if !reflect.DeepEqual(result, expected) { + t.Errorf("Expected %v, got %v", expected, result) + } +} + +// Handles window size equal to data length by returning a single average +func TestMovingAverageWindowSizeEqualDataLength(t *testing.T) { + data := []int{1, 2, 3, 4, 5} + windowSize := 5 + expected := []float64{3.0} + + result := movingAverage(data, windowSize) + + if !reflect.DeepEqual(result, expected) { + t.Errorf("Expected %v, got %v", expected, result) + } +} + +// Handles empty data array gracefully +func TestMovingAverageEmptyData(t *testing.T) { + data := []int{} + windowSize := 3 + var expected []float64 + expected = nil + + result := movingAverage(data, windowSize) + + if !reflect.DeepEqual(result, expected) { + t.Errorf("Expected %v (type %T), got %v (type %T)", expected, expected, result, result) + } +} + +// Manages window size of zero by defaulting to 1 +func TestMovingAverageWindowSizeZero(t *testing.T) { + data := []int{1, 2, 3, 4, 5} + windowSize := 0 + expected := []float64{1.0, 2.0, 3.0, 4.0, 5.0} + + result := movingAverage(data, windowSize) + + if !reflect.DeepEqual(result, expected) { + t.Errorf("Expected %v, got %v", expected, result) + } +} + +// Deals with window size larger than data length by defaulting to data length +func TestMovingAverageWindowSizeLargerThanDataLength(t *testing.T) { + data := []int{1, 2, 3} + windowSize := 5 + expected := []float64{2.0} + + result := movingAverage(data, windowSize) + + if !reflect.DeepEqual(result, expected) { + t.Errorf("Expected %v, got %v", expected, result) + } +} + +func TestGoroutineLeakDetectorWithSystemStart(t *testing.T) { + stats := emulateSystemStart(5, 5) + detected, _ := detectGoroutineLeaks(stats) + if detected { + t.Errorf("Expected no goroutine leak, but detected one") + } +} + +func TestGoroutineLeakDetectorWithLegitSpike(t *testing.T) { + stats := emulateSpikeAfterSystemStart(5, 5, 20) + detected, _ := detectGoroutineLeaks(stats) + if detected { + t.Errorf("Expected no goroutine leak, but detected one") + } +} + +func TestGoroutineLeakDetectorWithDecreaseAfterSpike(t *testing.T) { + stats := emulateDecreaseAfterSpike(10, 20, 5, 5) + detected, _ := detectGoroutineLeaks(stats) + if detected { + t.Errorf("Expected no goroutine leak, but detected one") + } +} + +func TestGoroutineLeakDetectorWithLeakAfterSpike(t *testing.T) { + stats := emulateLeakAfterSpike(100, 5, 20, 5) + detected, _ := detectGoroutineLeaks(stats) + if !detected { + t.Errorf("Expected goroutine leak to be detected, but it was not") + } +} + +func TestGoroutineLeakDetectorWithLeakEachStep(t *testing.T) { + startDuration := 5 + stabilizationDuration := 5 + spikeDuration := 20 + leakDuration := 100 + leakMayBeDetectedAfter := startDuration + stabilizationDuration + spikeDuration + stabilizationDuration + possibleFalsePositives := 60 + leakMustBeDetectedAfter := leakMayBeDetectedAfter + possibleFalsePositives + stats := emulateLeakAfterSpike(leakDuration, stabilizationDuration, spikeDuration, startDuration) + // Now check the behavior of detector on each new data point + for i := 0; i < len(stats); i++ { + detected, _ := detectGoroutineLeaks(stats[:i]) + // Leak should be detected after the slow increase starts + if detected && i < startDuration+stabilizationDuration+spikeDuration+stabilizationDuration { + t.Errorf("Expected no goroutine leak, but detected one at step %d", i) + } + if !detected && i >= leakMayBeDetectedAfter && i < leakMustBeDetectedAfter { + t.Logf("Expected goroutine leak to be detected, but it was not at step %d", i) + } + if !detected && i >= leakMustBeDetectedAfter { + t.Errorf("Expected goroutine leak to be detected, but it was not at step %d", i) + } + } +} + +// Handles empty input data gracefully +func TestEmptyInputData(t *testing.T) { + stats := []int{} + detected, smoothedData := detectGoroutineLeaks(stats) + if detected || smoothedData != nil { + t.Errorf("Expected no detection and nil smoothed data for empty input") + } +} + +// Handles input data with fewer than two elements +func TestInputDataWithFewerThanTwoElements(t *testing.T) { + stats := []int{5} + detected, smoothedData := detectGoroutineLeaks(stats) + if detected || len(smoothedData) != 0 { + t.Errorf("Expected no detection and empty smoothed data for input with fewer than two elements") + } +} + +// Handles window size larger than data length +func TestWindowSizeLargerThanDataLength(t *testing.T) { + stats := []int{1, 2} + windowSize := len(stats) + 1 + smoothedData := movingAverage(stats, windowSize) + if len(smoothedData) != 1 { + t.Errorf("Expected smoothed data length of 1 when window size is larger than data length") + } +} + +// Monitors goroutine count at regular intervals +func TestGoroutinesMonitorNoLeak(t *testing.T) { + keepStatsFor := 24 * 60 * time.Millisecond + goroutinesThreshold := 100 + checkInterval := 1 * time.Millisecond + checkStatsFor := 60 * time.Millisecond + cooldownPeriod := 10 * time.Millisecond + + backupOut := logger.Out + // Create a pipe to capture log output + r, w, _ := os.Pipe() + logger.Out = w + + go func() { + goroutinesMonitor(goroutinesThreshold, checkInterval, checkStatsFor, keepStatsFor, cooldownPeriod) + }() + + var wg sync.WaitGroup + + wg.Add(1) + + // Create a goroutine to simulate the creation of goroutines + go func() { + timeStart := time.Now() + for { + if time.Since(timeStart) > 2*keepStatsFor { + break + } + // Create a goroutine + go func() { + time.Sleep(checkInterval / 2) + }() + time.Sleep(2 * checkInterval) + } + wg.Done() + }() + + wg.Wait() + + // Close the pipe + w.Close() + + // Read the log output + output, _ := io.ReadAll(r) + + // Check if the log output does not contain the detection message + // If it does, it means that the goroutine leak was detected + if strings.Contains(string(output), "leak detected") { + t.Errorf("Expected no goroutine leak to be detected") + } + + logger.Out = backupOut + +} + +func TestGoroutinesMonitorLeak(t *testing.T) { + keepStatsFor := 24 * 60 * time.Millisecond + goroutinesThreshold := 100 + checkInterval := 1 * time.Millisecond + checkStatsFor := 60 * time.Millisecond + cooldownPeriod := 10 * time.Millisecond + + backupOut := logger.Out + // Create a pipe to capture log output + r, w, _ := os.Pipe() + logger.Out = w + + go func() { + goroutinesMonitor(goroutinesThreshold, checkInterval, checkStatsFor, keepStatsFor, cooldownPeriod) + }() + + var wg sync.WaitGroup + + wg.Add(1) + + // Create a goroutine to simulate the creation of goroutines + go func() { + timeStart := time.Now() + for { + if time.Since(timeStart) > 2*keepStatsFor { + break + } + // Create a goroutine + go func() { + time.Sleep(checkInterval * 100) + }() + time.Sleep(checkInterval / 2) + } + wg.Done() + }() + + wg.Wait() + + // Close the pipe + _ = w.Close() + + logger.Out = backupOut + + // Read the log output + output, _ := io.ReadAll(r) + + // Check if the log output contains the expected message + if !strings.Contains(string(output), "leak detected") { + t.Errorf("Expected log output to contain 'leak detected'") + } + +}