Skip to content

Commit

Permalink
refactor: redistribute checks into smaller packages (#65)
Browse files Browse the repository at this point in the history
* feat: match-up health check with latency; add additional health configs

* feat: add check base struct to latency check

* refactor: redistribute checks into smaller dedicated packages

* chore: licensing

* refactor: mv oapi schema builder to oapi package

* refactor: rename checks.config package

* chore: rename checks/specs > checks/types

* chore: simplify check naming

* chore: mv oapi helper back to checks package

* chore: more check naming

---------

Signed-off-by: Bruno Bressi <[email protected]>
Co-authored-by: Bruno Bressi <[email protected]>
  • Loading branch information
lvlcn-t and puffitos authored Jan 17, 2024
1 parent d2de615 commit c4602b2
Show file tree
Hide file tree
Showing 25 changed files with 398 additions and 314 deletions.
58 changes: 3 additions & 55 deletions pkg/checks/checks.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// sparrow
// (C) 2023, Deutsche Telekom IT GmbH
// (C) 2024, Deutsche Telekom IT GmbH
//
// Deutsche Telekom IT GmbH and all other contributors /
// copyright owners license this file to you under the Apache
Expand All @@ -20,29 +20,12 @@ package checks

import (
"context"
"sync"
"time"

"github.com/getkin/kin-openapi/openapi3"
"github.com/prometheus/client_golang/prometheus"

"github.com/caas-team/sparrow/internal/helper"
"github.com/caas-team/sparrow/pkg/api"
)

var (
// RegisteredChecks will be registered in this map
// The key is the name of the Check
// The name needs to map the configuration item key
RegisteredChecks = map[string]func() Check{
"health": NewHealthCheck,
"latency": NewLatencyCheck,
}
// DefaultRetry provides a default configuration for the retry mechanism
DefaultRetry = helper.RetryConfig{
Count: 3,
Delay: time.Second,
}
"github.com/caas-team/sparrow/pkg/checks/types"
)

// Check implementations are expected to perform specific monitoring tasks and report results.
Expand All @@ -56,7 +39,7 @@ type Check interface {
// Startup is called once when the check is registered
// In the Run() method, the check should send results to the cResult channel
// this will cause sparrow to update its data store with the results
Startup(ctx context.Context, cResult chan<- Result) error
Startup(ctx context.Context, cResult chan<- types.Result) error
// Shutdown is called once when the check is unregistered or sparrow shuts down
Shutdown(ctx context.Context) error
// SetConfig is called once when the check is registered
Expand All @@ -72,38 +55,3 @@ type Check interface {
// GetMetricCollectors allows the check to provide prometheus metric collectors
GetMetricCollectors() []prometheus.Collector
}

// CheckBase is a struct providing common fields used by implementations of the Check interface.
// It serves as a foundational structure that should be embedded in specific check implementations.
type CheckBase struct {
// Mutex for thread-safe access to shared resources within the check implementation
mu sync.Mutex
// Essential for passing check results back to the Sparrow; must be utilized by Check implementations
cResult chan<- Result
// Signal channel used to notify about shutdown of a check
done chan bool
}

// Result encapsulates the outcome of a check run.
type Result struct {
// data contains performance metrics about the check run
Data any `json:"data"`
// Timestamp is the UTC time the check was run
Timestamp time.Time `json:"timestamp"`
// Err should be nil if the check ran successfully indicating the check is "healthy"
// if the check failed, this should be an error message that will be logged and returned to an API user
Err string `json:"error"`
}

// GlobalTarget includes the basic information regarding
// other Sparrow instances, which this Sparrow can communicate with.
type GlobalTarget struct {
Url string `json:"url"`
LastSeen time.Time `json:"lastSeen"`
}

// ResultDTO is a data transfer object used to associate a check's name with its result.
type ResultDTO struct {
Name string
Result *Result
}
15 changes: 8 additions & 7 deletions pkg/checks/checks_moq.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions pkg/checks/errors.go → pkg/checks/errors/errors.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// sparrow
// (C) 2023, Deutsche Telekom IT GmbH
// (C) 2024, Deutsche Telekom IT GmbH
//
// Deutsche Telekom IT GmbH and all other contributors /
// copyright owners license this file to you under the Apache
Expand All @@ -16,7 +16,7 @@
// specific language governing permissions and limitations
// under the License.

package checks
package errors

import "errors"

Expand Down
75 changes: 39 additions & 36 deletions pkg/checks/health.go → pkg/checks/health/health.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// sparrow
// (C) 2023, Deutsche Telekom IT GmbH
// (C) 2024, Deutsche Telekom IT GmbH
//
// Deutsche Telekom IT GmbH and all other contributors /
// copyright owners license this file to you under the Apache
Expand All @@ -16,7 +16,7 @@
// specific language governing permissions and limitations
// under the License.

package checks
package health

import (
"context"
Expand All @@ -29,52 +29,55 @@ import (
"github.com/caas-team/sparrow/internal/helper"
"github.com/caas-team/sparrow/internal/logger"
"github.com/caas-team/sparrow/pkg/api"
"github.com/caas-team/sparrow/pkg/checks"
"github.com/caas-team/sparrow/pkg/checks/errors"
"github.com/caas-team/sparrow/pkg/checks/types"
"github.com/getkin/kin-openapi/openapi3"
"github.com/prometheus/client_golang/prometheus"
)

var (
_ Check = (*Health)(nil)
stateMapping = map[int]string{
_ checks.Check = (*Health)(nil)
stateMapping = map[int]string{
0: "unhealthy",
1: "healthy",
}
)

// Health is a check that measures the availability of an endpoint
type Health struct {
CheckBase
types.CheckBase
route string
config HealthConfig
metrics healthMetrics
config config
metrics metrics
}

// NewHealthCheck creates a new instance of the health check
func NewHealthCheck() Check {
// NewCheck creates a new instance of the health check
func NewCheck() checks.Check {
return &Health{
CheckBase: CheckBase{
mu: sync.Mutex{},
cResult: nil,
done: make(chan bool, 1),
CheckBase: types.CheckBase{
Mu: sync.Mutex{},
CResult: nil,
Done: make(chan bool, 1),
},
route: "health",
config: HealthConfig{
Retry: DefaultRetry,
config: config{
Retry: types.DefaultRetry,
},
metrics: newHealthMetrics(),
metrics: newMetrics(),
}
}

// HealthConfig defines the configuration parameters for a health check
type HealthConfig struct {
// config defines the configuration parameters for a health check
type config struct {
Targets []string `json:"targets,omitempty" yaml:"targets,omitempty" mapstructure:"targets"`
Interval time.Duration `json:"interval" yaml:"interval" mapstructure:"interval"`
Timeout time.Duration `json:"timeout" yaml:"timeout" mapstructure:"timeout"`
Retry helper.RetryConfig `json:"retry" yaml:"retry" mapstructure:"retry"`
}

// healthMetrics contains the metric collectors for the Health check
type healthMetrics struct {
// metrics contains the metric collectors for the Health check
type metrics struct {
*prometheus.GaugeVec
}

Expand All @@ -90,53 +93,53 @@ func (h *Health) Run(ctx context.Context) error {
case <-ctx.Done():
log.Error("Context canceled", "err", ctx.Err())
return ctx.Err()
case <-h.done:
case <-h.Done:
log.Debug("Soft shut down")
return nil
case <-time.After(h.config.Interval):
res := h.check(ctx)
errval := ""
r := Result{
r := types.Result{
Data: res,
Err: errval,
Timestamp: time.Now(),
}

h.cResult <- r
h.CResult <- r
log.Debug("Successfully finished health check run")
}
}
}

// Startup is called once when the health check is registered
func (h *Health) Startup(ctx context.Context, cResult chan<- Result) error {
func (h *Health) Startup(ctx context.Context, cResult chan<- types.Result) error {
log := logger.FromContext(ctx).WithGroup("health")
log.Debug("Initializing health check")

h.cResult = cResult
h.CResult = cResult
return nil
}

// Shutdown is called once when the check is unregistered or sparrow shuts down
func (h *Health) Shutdown(_ context.Context) error {
h.done <- true
close(h.done)
h.Done <- true
close(h.Done)

return nil
}

// SetConfig sets the configuration for the health check
func (h *Health) SetConfig(ctx context.Context, config any) error {
func (h *Health) SetConfig(ctx context.Context, conf any) error {
log := logger.FromContext(ctx)

c, err := helper.Decode[HealthConfig](config)
c, err := helper.Decode[config](conf)
if err != nil {
log.Error("Failed to decode health config", "error", err)
return ErrInvalidConfig
return errors.ErrInvalidConfig
}

h.mu.Lock()
defer h.mu.Unlock()
h.Mu.Lock()
defer h.Mu.Unlock()
h.config = c

return nil
Expand All @@ -145,7 +148,7 @@ func (h *Health) SetConfig(ctx context.Context, config any) error {
// Schema provides the schema of the data that will be provided
// by the health check
func (h *Health) Schema() (*openapi3.SchemaRef, error) {
return OpenapiFromPerfData[map[string]string](map[string]string{})
return checks.OpenapiFromPerfData[map[string]string](map[string]string{})
}

// RegisterHandler dynamically registers a server handler
Expand All @@ -164,9 +167,9 @@ func (h *Health) DeregisterHandler(_ context.Context, router *api.RoutingTree) {
router.Remove(http.MethodGet, h.route)
}

// NewHealthMetrics initializes metric collectors of the health check
func newHealthMetrics() healthMetrics {
return healthMetrics{
// newMetrics initializes metric collectors of the health check
func newMetrics() metrics {
return metrics{
GaugeVec: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "sparrow_health_up",
Expand Down
Loading

0 comments on commit c4602b2

Please sign in to comment.