From bfbbd13cbf629d7dff62e5ddd4aab566f2801ef0 Mon Sep 17 00:00:00 2001 From: Gennady Utkin Date: Sun, 25 Aug 2024 18:48:15 -0500 Subject: [PATCH 1/3] Fix Go version for the tests --- .github/workflows/go.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 591abae..00d83f2 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -18,7 +18,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v4 with: - go-version: '1.20' + go-version: '1.23' - name: Install dependencies run: | go get . From dc1814883240afff65e069d50c651dcf05ea1c5f Mon Sep 17 00:00:00 2001 From: Gennady Utkin Date: Sun, 25 Aug 2024 19:17:00 -0500 Subject: [PATCH 2/3] Code cleanup --- .github/workflows/go.yml | 5 +-- checks/clusterDataNodeCount.go | 26 ++++++-------- checks/clusterHealth.go | 7 ++-- checks/clusterNodeCount.go | 13 +++---- checks/nodeCpuUsage.go | 62 ++++++++++++++++++---------------- checks/nodeDiskUsage.go | 46 +++++++++++++------------ checks/nodeHeapMemory.go | 61 +++++++++++++++++---------------- config/config.go | 26 +++++++------- go.mod | 12 ++++--- go.sum | 19 +++++++++-- helper/helper.go | 8 ++--- main.go | 5 +-- 12 files changed, 151 insertions(+), 139 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 00d83f2..2e7a108 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -18,9 +18,6 @@ jobs: - name: Set up Go uses: actions/setup-go@v4 with: - go-version: '1.23' - - name: Install dependencies - run: | - go get . + go-version: '1.22' - name: Build run: go build -v ./... diff --git a/checks/clusterDataNodeCount.go b/checks/clusterDataNodeCount.go index 83b2609..2be9995 100644 --- a/checks/clusterDataNodeCount.go +++ b/checks/clusterDataNodeCount.go @@ -3,10 +3,11 @@ package checks import ( "encoding/json" "fmt" - "github.com/atc0005/go-nagios" "io" "nagios-es/config" "net/http" + + "github.com/atc0005/go-nagios" ) func CheckClusterDataNodeCount(c *config.Config) *nagios.Plugin { @@ -19,14 +20,8 @@ func CheckClusterDataNodeCount(c *config.Config) *nagios.Plugin { plugin.ExitStatusCode = nagios.StateCRITICALExitCode return plugin } - defer func(Body io.ReadCloser) { - err := Body.Close() - if err != nil { - plugin.ServiceOutput = "CRITICAL: Failed to read response from Elasticsearch" - plugin.ExitStatusCode = nagios.StateCRITICALExitCode - plugin.Errors = append(plugin.Errors, err) - } - }(resp.Body) + + defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { @@ -42,18 +37,17 @@ func CheckClusterDataNodeCount(c *config.Config) *nagios.Plugin { return plugin } - if health.NumberOfDataNodes < c.CriticalThreshold { + switch { + case health.NumberOfDataNodes < c.CriticalThreshold: plugin.ServiceOutput = fmt.Sprintf("CRITICAL: Number of nodes is %d", health.NumberOfDataNodes) plugin.ExitStatusCode = nagios.StateCRITICALExitCode - return plugin - } - if health.NumberOfDataNodes < c.WarningThreshold { + case health.NumberOfDataNodes < c.WarningThreshold: plugin.ServiceOutput = fmt.Sprintf("WARNING: Number of nodes is %d", health.NumberOfDataNodes) plugin.ExitStatusCode = nagios.StateWARNINGExitCode - return plugin + default: + plugin.ServiceOutput = fmt.Sprintf("OK: Number of nodes is %d", health.NumberOfDataNodes) + plugin.ExitStatusCode = nagios.StateOKExitCode } - plugin.ServiceOutput = fmt.Sprintf("OK: Number of nodes is %d", health.NumberOfDataNodes) - plugin.ExitStatusCode = nagios.StateOKExitCode return plugin } diff --git a/checks/clusterHealth.go b/checks/clusterHealth.go index 8c90810..9e412e2 100644 --- a/checks/clusterHealth.go +++ b/checks/clusterHealth.go @@ -3,11 +3,12 @@ package checks import ( "encoding/json" "fmt" - "github.com/atc0005/go-nagios" "io" "log" "nagios-es/config" "net/http" + + "github.com/atc0005/go-nagios" ) type ClusterHealthResponse struct { @@ -61,10 +62,12 @@ func CheckClusterHealth(c *config.Config) *nagios.Plugin { {Label: "unassigned_shards", Value: fmt.Sprintf("%d", health.UnassignedShards)}, {Label: "active_shards", Value: fmt.Sprintf("%d", health.ActiveShards)}, } + if err := plugin.AddPerfData(false, pd...); err != nil { - log.Printf("failed to add performance data metrics: %v", err) + log.Printf("failed to add performance data metrics: %v\n", err) plugin.Errors = append(plugin.Errors, err) } + plugin.ExitStatusCode = nagios.StateWARNINGExitCode case "red": plugin.ServiceOutput = "CRITICAL: Cluster health is red" diff --git a/checks/clusterNodeCount.go b/checks/clusterNodeCount.go index 0a543e4..334be73 100644 --- a/checks/clusterNodeCount.go +++ b/checks/clusterNodeCount.go @@ -3,10 +3,11 @@ package checks import ( "encoding/json" "fmt" - "github.com/atc0005/go-nagios" "io" "nagios-es/config" "net/http" + + "github.com/atc0005/go-nagios" ) type ClusterNodeCountResponse struct { @@ -24,14 +25,8 @@ func CheckClusterNodeCount(c *config.Config) *nagios.Plugin { plugin.ExitStatusCode = nagios.StateCRITICALExitCode return plugin } - defer func(Body io.ReadCloser) { - err := Body.Close() - if err != nil { - plugin.ServiceOutput = "CRITICAL: Failed to read response from Elasticsearch" - plugin.ExitStatusCode = nagios.StateCRITICALExitCode - plugin.Errors = append(plugin.Errors, err) - } - }(resp.Body) + + defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { diff --git a/checks/nodeCpuUsage.go b/checks/nodeCpuUsage.go index 6d02d05..b3bbef4 100644 --- a/checks/nodeCpuUsage.go +++ b/checks/nodeCpuUsage.go @@ -3,11 +3,12 @@ package checks import ( "encoding/json" "fmt" - "github.com/atc0005/go-nagios" "io" "log" "nagios-es/config" "net/http" + + "github.com/atc0005/go-nagios" ) func CheckNodeCPUUsage(c *config.Config) *nagios.Plugin { @@ -48,7 +49,7 @@ func CheckNodeCPUUsage(c *config.Config) *nagios.Plugin { for _, node := range health.Nodes { if node.IP == c.NodeIP { nodeCpuPercent := nagios.PerformanceData{ - Label: fmt.Sprintf("%s", node.Name), + Label: node.Name, Value: fmt.Sprintf("%d", node.OS.CPU.Percent), Warn: fmt.Sprintf("%d", c.WarningThreshold), Crit: fmt.Sprintf("%d", c.CriticalThreshold), @@ -56,28 +57,29 @@ func CheckNodeCPUUsage(c *config.Config) *nagios.Plugin { Max: "100", UnitOfMeasurement: "%", } + if err := plugin.AddPerfData(false, nodeCpuPercent); err != nil { - log.Printf("failed to add performance data metrics: %v", err) + log.Printf("failed to add performance data metrics: %v\n", err) plugin.Errors = append(plugin.Errors, err) } - if node.OS.CPU.Percent > c.CriticalThreshold { + + switch { + case node.OS.CPU.Percent > c.CriticalThreshold: plugin.ServiceOutput = fmt.Sprintf("CRITICAL: CPU usage on node %s is %d%%", node.IP, node.OS.CPU.Percent) plugin.ExitStatusCode = nagios.StateCRITICALExitCode - return plugin - } - if node.OS.CPU.Percent > c.WarningThreshold { + case node.OS.CPU.Percent > c.WarningThreshold: plugin.ServiceOutput = fmt.Sprintf("WARNING: CPU usage on node %s is %d%%", node.IP, node.OS.CPU.Percent) plugin.ExitStatusCode = nagios.StateWARNINGExitCode - return plugin + default: + plugin.ServiceOutput = fmt.Sprintf("OK: CPU usage on node %s less then %d", node.IP, c.WarningThreshold) + plugin.ExitStatusCode = nagios.StateOKExitCode } - - plugin.ServiceOutput = fmt.Sprintf("OK: CPU usage on node %s less then %d", node.IP, c.WarningThreshold) - plugin.ExitStatusCode = nagios.StateOKExitCode return plugin } + if node.Name == c.NodeName { nodeCpuPercent := nagios.PerformanceData{ - Label: fmt.Sprintf("%s", node.Name), + Label: node.Name, Value: fmt.Sprintf("%d", node.OS.CPU.Percent), Warn: fmt.Sprintf("%d", c.WarningThreshold), Crit: fmt.Sprintf("%d", c.CriticalThreshold), @@ -86,30 +88,31 @@ func CheckNodeCPUUsage(c *config.Config) *nagios.Plugin { UnitOfMeasurement: "%", } if err := plugin.AddPerfData(false, nodeCpuPercent); err != nil { - log.Printf("failed to add performance data metrics: %v", err) + log.Printf("failed to add performance data metrics: %v\n", err) plugin.Errors = append(plugin.Errors, err) } - if node.OS.CPU.Percent > c.CriticalThreshold { + switch { + case node.OS.CPU.Percent > c.CriticalThreshold: plugin.ServiceOutput = fmt.Sprintf("CRITICAL: CPU usage on node %s is %d%%", node.Name, node.OS.CPU.Percent) plugin.ExitStatusCode = nagios.StateCRITICALExitCode - return plugin - } - if node.OS.CPU.Percent > c.WarningThreshold { + case node.OS.CPU.Percent > c.WarningThreshold: plugin.ServiceOutput = fmt.Sprintf("WARNING: CPU usage on node %s is %d%%", node.Name, node.OS.CPU.Percent) plugin.ExitStatusCode = nagios.StateWARNINGExitCode - return plugin + default: + plugin.ServiceOutput = fmt.Sprintf("OK: CPU usage on node %s less then %d", node.Name, c.WarningThreshold) + plugin.ExitStatusCode = nagios.StateOKExitCode } - plugin.ServiceOutput = fmt.Sprintf("OK: CPU usage on node %s less then %d", node.Name, c.WarningThreshold) - plugin.ExitStatusCode = nagios.StateOKExitCode return plugin } + if node.OS.CPU.Percent > maxCPU { maxCPU = node.OS.CPU.Percent } + nodeCpuPercent := nagios.PerformanceData{ - Label: fmt.Sprintf("%s", node.Name), + Label: node.Name, Value: fmt.Sprintf("%d", node.OS.CPU.Percent), Warn: fmt.Sprintf("%d", c.WarningThreshold), Crit: fmt.Sprintf("%d", c.CriticalThreshold), @@ -121,22 +124,21 @@ func CheckNodeCPUUsage(c *config.Config) *nagios.Plugin { } if err := plugin.AddPerfData(false, pd...); err != nil { - log.Printf("failed to add performance data metrics: %v", err) + log.Printf("failed to add performance data metrics: %v\n", err) plugin.Errors = append(plugin.Errors, err) } - if maxCPU > c.CriticalThreshold { + + switch { + case maxCPU > c.CriticalThreshold: plugin.ServiceOutput = fmt.Sprintf("CRITICAL: Max(CPU usage) on cluster is %d%%", maxCPU) plugin.ExitStatusCode = nagios.StateCRITICALExitCode - return plugin - } - if maxCPU > c.WarningThreshold { + case maxCPU > c.WarningThreshold: plugin.ServiceOutput = fmt.Sprintf("WARNING: Max(CPU usage) on cluster is %d%%", maxCPU) plugin.ExitStatusCode = nagios.StateWARNINGExitCode - return plugin + default: + plugin.ServiceOutput = fmt.Sprintf("OK: Max(CPU usage) on cluster less then %d", c.WarningThreshold) + plugin.ExitStatusCode = nagios.StateOKExitCode } - plugin.ServiceOutput = fmt.Sprintf("OK: Max(CPU usage) on cluster less then %d", c.WarningThreshold) - plugin.ExitStatusCode = nagios.StateOKExitCode - return plugin } diff --git a/checks/nodeDiskUsage.go b/checks/nodeDiskUsage.go index dd480dd..dfa9736 100644 --- a/checks/nodeDiskUsage.go +++ b/checks/nodeDiskUsage.go @@ -3,12 +3,13 @@ package checks import ( "encoding/json" "fmt" - "github.com/atc0005/go-nagios" "io" "log" "nagios-es/config" "nagios-es/helper" "net/http" + + "github.com/atc0005/go-nagios" ) func CheckNodeDiskUsage(c *config.Config) *nagios.Plugin { @@ -50,7 +51,7 @@ func CheckNodeDiskUsage(c *config.Config) *nagios.Plugin { node.FS.Total.UsedPercent = helper.CalculateDiskUsagePercentage(node.FS.Total.TotalInBytes, node.FS.Total.FreeInBytes) if node.IP == c.NodeIP { nodeDiskUsagePercent := nagios.PerformanceData{ - Label: fmt.Sprintf("%s", node.Name), + Label: node.Name, Value: fmt.Sprintf("%d", node.FS.Total.UsedPercent), Warn: fmt.Sprintf("%d", c.WarningThreshold), Crit: fmt.Sprintf("%d", c.CriticalThreshold), @@ -58,28 +59,30 @@ func CheckNodeDiskUsage(c *config.Config) *nagios.Plugin { Max: "100", UnitOfMeasurement: "%", } + if err := plugin.AddPerfData(false, nodeDiskUsagePercent); err != nil { - log.Printf("failed to add performance data metrics: %v", err) + log.Printf("failed to add performance data metrics: %v\n", err) plugin.Errors = append(plugin.Errors, err) } - if node.FS.Total.UsedPercent > c.CriticalThreshold { + + switch { + case node.FS.Total.UsedPercent > c.CriticalThreshold: plugin.ServiceOutput = fmt.Sprintf("CRITICAL: Disk usage on node %s is %d%%", node.IP, node.FS.Total.UsedPercent) plugin.ExitStatusCode = nagios.StateCRITICALExitCode - return plugin - } - if node.FS.Total.UsedPercent > c.WarningThreshold { + case node.FS.Total.UsedPercent > c.WarningThreshold: plugin.ServiceOutput = fmt.Sprintf("WARNING: Disk usage on node %s is %d%%", node.IP, node.FS.Total.UsedPercent) plugin.ExitStatusCode = nagios.StateWARNINGExitCode - return plugin + default: + plugin.ServiceOutput = fmt.Sprintf("OK: Disk usage on node %s less then %d", node.IP, c.WarningThreshold) + plugin.ExitStatusCode = nagios.StateOKExitCode } - plugin.ServiceOutput = fmt.Sprintf("OK: Disk usage on node %s less then %d", node.IP, c.WarningThreshold) - plugin.ExitStatusCode = nagios.StateOKExitCode return plugin } + if node.Name == c.NodeName { nodeDiskUsagePercent := nagios.PerformanceData{ - Label: fmt.Sprintf("%s", node.Name), + Label: node.Name, Value: fmt.Sprintf("%d", node.FS.Total.UsedPercent), Warn: fmt.Sprintf("%d", c.WarningThreshold), Crit: fmt.Sprintf("%d", c.CriticalThreshold), @@ -88,7 +91,7 @@ func CheckNodeDiskUsage(c *config.Config) *nagios.Plugin { UnitOfMeasurement: "%", } if err := plugin.AddPerfData(false, nodeDiskUsagePercent); err != nil { - log.Printf("failed to add performance data metrics: %v", err) + log.Printf("failed to add performance data metrics: %v\n", err) plugin.Errors = append(plugin.Errors, err) } @@ -111,7 +114,7 @@ func CheckNodeDiskUsage(c *config.Config) *nagios.Plugin { maxDiskUsage = node.FS.Total.UsedPercent } nodeDiskUsagePercent := nagios.PerformanceData{ - Label: fmt.Sprintf("%s", node.Name), + Label: node.Name, Value: fmt.Sprintf("%d", node.FS.Total.UsedPercent), Warn: fmt.Sprintf("%d", c.WarningThreshold), Crit: fmt.Sprintf("%d", c.CriticalThreshold), @@ -123,22 +126,21 @@ func CheckNodeDiskUsage(c *config.Config) *nagios.Plugin { } if err := plugin.AddPerfData(false, pd...); err != nil { - log.Printf("failed to add performance data metrics: %v", err) + log.Printf("failed to add performance data metrics: %v\n", err) plugin.Errors = append(plugin.Errors, err) } - if maxDiskUsage > c.CriticalThreshold { + + switch { + case maxDiskUsage > c.CriticalThreshold: plugin.ServiceOutput = fmt.Sprintf("CRITICAL: Max(Disk usage) on cluster is %d%%", maxDiskUsage) plugin.ExitStatusCode = nagios.StateCRITICALExitCode - return plugin - } - if maxDiskUsage > c.WarningThreshold { + case maxDiskUsage > c.WarningThreshold: plugin.ServiceOutput = fmt.Sprintf("WARNING: Max(Disk usage) on cluster is %d%%", maxDiskUsage) plugin.ExitStatusCode = nagios.StateWARNINGExitCode - return plugin + default: + plugin.ServiceOutput = fmt.Sprintf("OK: Max(Disk usage) on cluster less then %d", c.WarningThreshold) + plugin.ExitStatusCode = nagios.StateOKExitCode } - plugin.ServiceOutput = fmt.Sprintf("OK: Max(Disk usage) on cluster less then %d", c.WarningThreshold) - plugin.ExitStatusCode = nagios.StateOKExitCode - return plugin } diff --git a/checks/nodeHeapMemory.go b/checks/nodeHeapMemory.go index a4cf90b..3aa17ba 100644 --- a/checks/nodeHeapMemory.go +++ b/checks/nodeHeapMemory.go @@ -3,11 +3,12 @@ package checks import ( "encoding/json" "fmt" - "github.com/atc0005/go-nagios" "io" "log" "nagios-es/config" "net/http" + + "github.com/atc0005/go-nagios" ) func CheckNodeHeapMemory(c *config.Config) *nagios.Plugin { @@ -48,7 +49,7 @@ func CheckNodeHeapMemory(c *config.Config) *nagios.Plugin { for _, node := range health.Nodes { if node.IP == c.NodeIP { nodeHeapPercent := nagios.PerformanceData{ - Label: fmt.Sprintf("%s", node.Name), + Label: node.Name, Value: fmt.Sprintf("%d", node.JVM.Mem.HeapUsedPercent), Warn: fmt.Sprintf("%d", c.WarningThreshold), Crit: fmt.Sprintf("%d", c.CriticalThreshold), @@ -56,28 +57,30 @@ func CheckNodeHeapMemory(c *config.Config) *nagios.Plugin { Max: "100", UnitOfMeasurement: "%", } + if err := plugin.AddPerfData(false, nodeHeapPercent); err != nil { - log.Printf("failed to add performance data metrics: %v", err) + log.Printf("failed to add performance data metrics: %v\n", err) plugin.Errors = append(plugin.Errors, err) } - if node.JVM.Mem.HeapUsedPercent > c.CriticalThreshold { + + switch { + case node.JVM.Mem.HeapUsedPercent > c.CriticalThreshold: plugin.ServiceOutput = fmt.Sprintf("CRITICAL: Heap size on node %s is %d%%", node.IP, node.JVM.Mem.HeapUsedPercent) plugin.ExitStatusCode = nagios.StateCRITICALExitCode - return plugin - } - if node.JVM.Mem.HeapUsedPercent > c.WarningThreshold { + case node.JVM.Mem.HeapUsedPercent > c.WarningThreshold: plugin.ServiceOutput = fmt.Sprintf("WARNING: Heap size on node %s is %d%%", node.IP, node.JVM.Mem.HeapUsedPercent) plugin.ExitStatusCode = nagios.StateWARNINGExitCode - return plugin + default: + plugin.ServiceOutput = fmt.Sprintf("OK: Heap size on node %s less then %d", node.IP, c.WarningThreshold) + plugin.ExitStatusCode = nagios.StateOKExitCode } - plugin.ServiceOutput = fmt.Sprintf("OK: Heap size on node %s less then %d", node.IP, c.WarningThreshold) - plugin.ExitStatusCode = nagios.StateOKExitCode return plugin } + if node.Name == c.NodeName { nodeHeapPercent := nagios.PerformanceData{ - Label: fmt.Sprintf("%s", node.Name), + Label: node.Name, Value: fmt.Sprintf("%d", node.JVM.Mem.HeapUsedPercent), Warn: fmt.Sprintf("%d", c.WarningThreshold), Crit: fmt.Sprintf("%d", c.CriticalThreshold), @@ -85,31 +88,32 @@ func CheckNodeHeapMemory(c *config.Config) *nagios.Plugin { Max: "100", UnitOfMeasurement: "%", } + if err := plugin.AddPerfData(false, nodeHeapPercent); err != nil { - log.Printf("failed to add performance data metrics: %v", err) + log.Printf("failed to add performance data metrics: %v\n", err) plugin.Errors = append(plugin.Errors, err) } - if node.JVM.Mem.HeapUsedPercent > c.CriticalThreshold { + switch { + case node.JVM.Mem.HeapUsedPercent > c.CriticalThreshold: plugin.ServiceOutput = fmt.Sprintf("CRITICAL: Heap size on node %s is %d%%", node.Name, node.JVM.Mem.HeapUsedPercent) plugin.ExitStatusCode = nagios.StateCRITICALExitCode - return plugin - } - if node.JVM.Mem.HeapUsedPercent > c.WarningThreshold { + case node.JVM.Mem.HeapUsedPercent > c.WarningThreshold: plugin.ServiceOutput = fmt.Sprintf("WARNING: Heap size on node %s is %d%%", node.Name, node.JVM.Mem.HeapUsedPercent) plugin.ExitStatusCode = nagios.StateWARNINGExitCode - return plugin + default: + plugin.ServiceOutput = fmt.Sprintf("OK: Heap size on node %s less then %d", node.Name, c.WarningThreshold) + plugin.ExitStatusCode = nagios.StateOKExitCode } - plugin.ServiceOutput = fmt.Sprintf("OK: Heap size on node %s less then %d", node.Name, c.WarningThreshold) - plugin.ExitStatusCode = nagios.StateOKExitCode return plugin } if node.JVM.Mem.HeapUsedPercent > maxHeap { maxHeap = node.JVM.Mem.HeapUsedPercent } + nodeHeapPercent := nagios.PerformanceData{ - Label: fmt.Sprintf("%s", node.Name), + Label: node.Name, Value: fmt.Sprintf("%d", node.JVM.Mem.HeapUsedPercent), Warn: fmt.Sprintf("%d", c.WarningThreshold), Crit: fmt.Sprintf("%d", c.CriticalThreshold), @@ -121,22 +125,21 @@ func CheckNodeHeapMemory(c *config.Config) *nagios.Plugin { } if err := plugin.AddPerfData(false, pd...); err != nil { - log.Printf("failed to add performance data metrics: %v", err) + log.Printf("failed to add performance data metrics: %v\n", err) plugin.Errors = append(plugin.Errors, err) } - if maxHeap > c.CriticalThreshold { + + switch { + case maxHeap > c.CriticalThreshold: plugin.ServiceOutput = fmt.Sprintf("CRITICAL: Max(Heap size) on cluster is %d%%", maxHeap) plugin.ExitStatusCode = nagios.StateCRITICALExitCode - return plugin - } - if maxHeap > c.WarningThreshold { + case maxHeap > c.WarningThreshold: plugin.ServiceOutput = fmt.Sprintf("WARNING: Max(Heap size) on cluster is %d%%", maxHeap) plugin.ExitStatusCode = nagios.StateWARNINGExitCode - return plugin + default: + plugin.ServiceOutput = fmt.Sprintf("OK: Max(Heap size) on cluster less then %d", c.WarningThreshold) + plugin.ExitStatusCode = nagios.StateOKExitCode } - plugin.ServiceOutput = fmt.Sprintf("OK: Max(Heap size) on cluster less then %d", c.WarningThreshold) - plugin.ExitStatusCode = nagios.StateOKExitCode - return plugin } diff --git a/config/config.go b/config/config.go index 1bd9e9d..cde3381 100644 --- a/config/config.go +++ b/config/config.go @@ -26,33 +26,31 @@ func LoadConfig() (*Config, error) { pflag.CommandLine.AddGoFlagSet(flag.CommandLine) pflag.Parse() - err := viper.BindPFlags(pflag.CommandLine) - if err != nil { + if err := viper.BindPFlags(pflag.CommandLine); err != nil { return nil, err } - err = viper.BindEnv("es_url") - if err != nil { + if err := viper.BindEnv("es_url"); err != nil { return nil, err } - err = viper.BindEnv("check") - if err != nil { + + if err := viper.BindEnv("check"); err != nil { return nil, err } - err = viper.BindEnv("node_ip") - if err != nil { + + if err := viper.BindEnv("node_ip"); err != nil { return nil, err } - err = viper.BindEnv("node_name") - if err != nil { + + if err := viper.BindEnv("node_name"); err != nil { return nil, err } - err = viper.BindEnv("w") - if err != nil { + + if err := viper.BindEnv("w"); err != nil { return nil, err } - err = viper.BindEnv("c") - if err != nil { + + if err := viper.BindEnv("c"); err != nil { return nil, err } diff --git a/go.mod b/go.mod index ddffc42..5c4d4b0 100644 --- a/go.mod +++ b/go.mod @@ -1,22 +1,24 @@ module nagios-es -go 1.23 +go 1.22 + +require ( + github.com/atc0005/go-nagios v0.16.1 + github.com/spf13/pflag v1.0.5 + github.com/spf13/viper v1.19.0 +) require ( - github.com/atc0005/go-nagios v0.16.1 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/hashicorp/hcl v1.0.0 // indirect github.com/magiconair/properties v1.8.7 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect - github.com/olorin/nagiosplugin v1.2.0 // indirect github.com/pelletier/go-toml/v2 v2.2.2 // indirect github.com/sagikazarmark/locafero v0.4.0 // indirect github.com/sagikazarmark/slog-shim v0.1.0 // indirect github.com/sourcegraph/conc v0.3.0 // indirect github.com/spf13/afero v1.11.0 // indirect github.com/spf13/cast v1.6.0 // indirect - github.com/spf13/pflag v1.0.5 // indirect - github.com/spf13/viper v1.19.0 // indirect github.com/subosito/gotenv v1.6.0 // indirect go.uber.org/atomic v1.9.0 // indirect go.uber.org/multierr v1.9.0 // indirect diff --git a/go.sum b/go.sum index 950ba26..fa19adf 100644 --- a/go.sum +++ b/go.sum @@ -2,19 +2,31 @@ github.com/atc0005/go-nagios v0.16.1 h1:ef0AWjY9sqWq6dhfJuXtASe7dCkVDonoZhtYvNYW github.com/atc0005/go-nagios v0.16.1/go.mod h1:NSm1HeneeyBe27BYzhC7FMx4gg3x8PddeZIMX9YZj5M= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= +github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY= github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= -github.com/olorin/nagiosplugin v1.2.0 h1:WmwKsepdCY11CNwVkbSwFHHRfeb/gSktOPfjmgjt9tQ= -github.com/olorin/nagiosplugin v1.2.0/go.mod h1:/KnG+YnByojJ3D2OuoiOIzWwnN0yn3eNWLYnyVEDsHU= github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM= github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= +github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6keLGt6kNQ= github.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4= github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE= @@ -37,6 +49,7 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= @@ -51,6 +64,8 @@ golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/helper/helper.go b/helper/helper.go index 8be9346..6f65262 100644 --- a/helper/helper.go +++ b/helper/helper.go @@ -1,15 +1,15 @@ package helper import ( - "fmt" - "github.com/atc0005/go-nagios" ) -func ErrorUnknown(error string) { +func ErrorUnknown(errString string) { plugin := nagios.NewPlugin() + defer plugin.ReturnCheckResults() - plugin.ServiceOutput = fmt.Sprintf("UNKNOWN: %s", error) + + plugin.ServiceOutput = "UNKNOWN: " + errString plugin.ExitStatusCode = nagios.StateUNKNOWNExitCode plugin.ReturnCheckResults() } diff --git a/main.go b/main.go index 5920397..842bb25 100644 --- a/main.go +++ b/main.go @@ -11,14 +11,15 @@ import ( func main() { cfg, err := config.LoadConfig() if err != nil { - helper.ErrorUnknown("Cant load config") + helper.ErrorUnknown("Can't load config") } if cfg.ElasticsearchURL == "" { helper.ErrorUnknown("Elasticsearch URL is required") } + if cfg.Check == "" { - helper.ErrorUnknown("Check is required") + helper.ErrorUnknown("Check name is required") } var plugin *nagios.Plugin From e5c6c3fc8c6cf24ab08309e8d5c2f917f4e56b15 Mon Sep 17 00:00:00 2001 From: Gennady Utkin Date: Sun, 25 Aug 2024 19:27:23 -0500 Subject: [PATCH 3/3] Code cleanup --- checks/clusterHealth.go | 10 ++-------- checks/clusterNodeCount.go | 13 ++++++------- checks/nodeCpuUsage.go | 10 ++-------- checks/nodeDiskUsage.go | 26 +++++++++++--------------- checks/nodeHeapMemory.go | 11 +++-------- 5 files changed, 24 insertions(+), 46 deletions(-) diff --git a/checks/clusterHealth.go b/checks/clusterHealth.go index 9e412e2..799537b 100644 --- a/checks/clusterHealth.go +++ b/checks/clusterHealth.go @@ -28,14 +28,8 @@ func CheckClusterHealth(c *config.Config) *nagios.Plugin { plugin.ExitStatusCode = nagios.StateCRITICALExitCode return plugin } - defer func(Body io.ReadCloser) { - err := Body.Close() - if err != nil { - plugin.ServiceOutput = "CRITICAL: Failed to read response from Elasticsearch" - plugin.ExitStatusCode = nagios.StateCRITICALExitCode - plugin.Errors = append(plugin.Errors, err) - } - }(resp.Body) + + defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { diff --git a/checks/clusterNodeCount.go b/checks/clusterNodeCount.go index 334be73..4b16fac 100644 --- a/checks/clusterNodeCount.go +++ b/checks/clusterNodeCount.go @@ -42,18 +42,17 @@ func CheckClusterNodeCount(c *config.Config) *nagios.Plugin { return plugin } - if health.NumberOfNodes < c.CriticalThreshold { + switch { + case health.NumberOfNodes < c.CriticalThreshold: plugin.ServiceOutput = fmt.Sprintf("CRITICAL: Number of nodes is %d", health.NumberOfNodes) plugin.ExitStatusCode = nagios.StateCRITICALExitCode - return plugin - } - if health.NumberOfNodes < c.WarningThreshold { + case health.NumberOfNodes < c.WarningThreshold: plugin.ServiceOutput = fmt.Sprintf("WARNING: Number of nodes is %d", health.NumberOfNodes) plugin.ExitStatusCode = nagios.StateWARNINGExitCode - return plugin + default: + plugin.ServiceOutput = fmt.Sprintf("OK: Number of nodes is %d", health.NumberOfNodes) + plugin.ExitStatusCode = nagios.StateOKExitCode } - plugin.ServiceOutput = fmt.Sprintf("OK: Number of nodes is %d", health.NumberOfNodes) - plugin.ExitStatusCode = nagios.StateOKExitCode return plugin } diff --git a/checks/nodeCpuUsage.go b/checks/nodeCpuUsage.go index b3bbef4..92f59d9 100644 --- a/checks/nodeCpuUsage.go +++ b/checks/nodeCpuUsage.go @@ -21,14 +21,8 @@ func CheckNodeCPUUsage(c *config.Config) *nagios.Plugin { plugin.ExitStatusCode = nagios.StateCRITICALExitCode return plugin } - defer func(Body io.ReadCloser) { - err := Body.Close() - if err != nil { - plugin.ServiceOutput = "CRITICAL: Failed to read response from Elasticsearch" - plugin.ExitStatusCode = nagios.StateCRITICALExitCode - plugin.Errors = append(plugin.Errors, err) - } - }(resp.Body) + + defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { diff --git a/checks/nodeDiskUsage.go b/checks/nodeDiskUsage.go index dfa9736..9fea9c1 100644 --- a/checks/nodeDiskUsage.go +++ b/checks/nodeDiskUsage.go @@ -22,14 +22,8 @@ func CheckNodeDiskUsage(c *config.Config) *nagios.Plugin { plugin.ExitStatusCode = nagios.StateCRITICALExitCode return plugin } - defer func(Body io.ReadCloser) { - err := Body.Close() - if err != nil { - plugin.ServiceOutput = "CRITICAL: Failed to read response from Elasticsearch" - plugin.ExitStatusCode = nagios.StateCRITICALExitCode - plugin.Errors = append(plugin.Errors, err) - } - }(resp.Body) + + defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { @@ -90,29 +84,31 @@ func CheckNodeDiskUsage(c *config.Config) *nagios.Plugin { Max: "100", UnitOfMeasurement: "%", } + if err := plugin.AddPerfData(false, nodeDiskUsagePercent); err != nil { log.Printf("failed to add performance data metrics: %v\n", err) plugin.Errors = append(plugin.Errors, err) } - if node.FS.Total.UsedPercent > c.CriticalThreshold { + switch { + case node.FS.Total.UsedPercent > c.CriticalThreshold: plugin.ServiceOutput = fmt.Sprintf("CRITICAL: Disk usage on node %s is %d%%", node.Name, node.FS.Total.UsedPercent) plugin.ExitStatusCode = nagios.StateCRITICALExitCode - return plugin - } - if node.FS.Total.UsedPercent > c.WarningThreshold { + case node.FS.Total.UsedPercent > c.WarningThreshold: plugin.ServiceOutput = fmt.Sprintf("WARNING: Disk usage on node %s is %d%%", node.Name, node.FS.Total.UsedPercent) plugin.ExitStatusCode = nagios.StateWARNINGExitCode - return plugin + default: + plugin.ServiceOutput = fmt.Sprintf("OK: Disk uage on node %s less then %d", node.Name, c.WarningThreshold) + plugin.ExitStatusCode = nagios.StateOKExitCode } - plugin.ServiceOutput = fmt.Sprintf("OK: Disk uage on node %s less then %d", node.Name, c.WarningThreshold) - plugin.ExitStatusCode = nagios.StateOKExitCode return plugin } + if node.FS.Total.UsedPercent > maxDiskUsage { maxDiskUsage = node.FS.Total.UsedPercent } + nodeDiskUsagePercent := nagios.PerformanceData{ Label: node.Name, Value: fmt.Sprintf("%d", node.FS.Total.UsedPercent), diff --git a/checks/nodeHeapMemory.go b/checks/nodeHeapMemory.go index 3aa17ba..8b03e68 100644 --- a/checks/nodeHeapMemory.go +++ b/checks/nodeHeapMemory.go @@ -21,14 +21,8 @@ func CheckNodeHeapMemory(c *config.Config) *nagios.Plugin { plugin.ExitStatusCode = nagios.StateCRITICALExitCode return plugin } - defer func(Body io.ReadCloser) { - err := Body.Close() - if err != nil { - plugin.ServiceOutput = "CRITICAL: Failed to read response from Elasticsearch" - plugin.ExitStatusCode = nagios.StateCRITICALExitCode - plugin.Errors = append(plugin.Errors, err) - } - }(resp.Body) + + defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { @@ -108,6 +102,7 @@ func CheckNodeHeapMemory(c *config.Config) *nagios.Plugin { return plugin } + if node.JVM.Mem.HeapUsedPercent > maxHeap { maxHeap = node.JVM.Mem.HeapUsedPercent }