From becda9ef07a65d197b7adf5554c5d8821a07b1c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADctor=20Rold=C3=A1n=20Betancort?= Date: Tue, 27 Feb 2024 09:27:34 +0000 Subject: [PATCH] enables exemplars prometheus support enabling exemplars makes possible to correlate traces with a metric. Clients scraping will be able to do content negotiation and request OpenMetrics format. This change is not innocuous: prometheus defaults to scraping with OpenMetrics format, and as a result the "le" labels of histograms will have a trailing zero. From the prometheus/client_golang docs // If true, the experimental OpenMetrics encoding is added to the // possible options during content negotiation. Note that Prometheus // 2.5.0+ will negotiate OpenMetrics as first priority. OpenMetrics is // the only way to transmit exemplars. However, the move to OpenMetrics // is not completely transparent. Most notably, the values of "quantile" // labels of Summaries and "le" labels of Histograms are formatted with // a trailing ".0" if they would otherwise look like integer numbers // (which changes the identity of the resulting series on the Prometheus // server). --- go.mod | 2 +- go.sum | 4 ++-- pkg/cmd/devtools.go | 3 +-- pkg/cmd/server/defaults.go | 46 +++++++++++++++++++++++++++++++++----- pkg/cmd/server/server.go | 18 --------------- 5 files changed, 44 insertions(+), 29 deletions(-) diff --git a/go.mod b/go.mod index c53089865a..39aee2b087 100644 --- a/go.mod +++ b/go.mod @@ -36,8 +36,8 @@ require ( github.com/google/go-cmp v0.6.0 github.com/google/go-github/v43 v43.0.0 github.com/google/uuid v1.5.0 + github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.0 github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.1 - github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.0 github.com/hashicorp/go-memdb v1.3.4 github.com/hashicorp/go-multierror v1.1.1 diff --git a/go.sum b/go.sum index 67f3395449..3185d8f038 100644 --- a/go.sum +++ b/go.sum @@ -462,10 +462,10 @@ github.com/gostaticanalysis/testutil v0.4.0 h1:nhdCmubdmDF6VEatUNjgUZBJKWRqugoIS github.com/gostaticanalysis/testutil v0.4.0/go.mod h1:bLIoPefWXrRi/ssLFWX1dx7Repi5x3CuviD3dgAZaBU= github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 h1:UH//fgunKIs4JdUbpDl1VZCDaL56wXCB/5+wF6uHfaI= github.com/grpc-ecosystem/go-grpc-middleware v1.4.0/go.mod h1:g5qyo/la0ALbONm6Vbp88Yd8NsDy6rZz+RcrMPxvld8= +github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.0 h1:f4tggROQKKcnh4eItay6z/HbHLqghBxS8g7pyMhmDio= +github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.0/go.mod h1:hKAkSgNkL0FII46ZkJcpVEAai4KV+swlIWCKfekd1pA= github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.1 h1:HcUWd006luQPljE73d5sk+/VgYPGUReEVz2y1/qylwY= github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.1/go.mod h1:w9Y7gY31krpLmrVU5ZPG9H7l9fZuRu5/3R3S3FMtVQ4= -github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= -github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.0 h1:RtRsiaGvWxcwd8y3BiRZxsylPT8hLWZ5SPcfI+3IDNk= github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.0/go.mod h1:TzP6duP4Py2pHLVPPQp42aoYI92+PCrVotyR5e8Vqlk= github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= diff --git a/pkg/cmd/devtools.go b/pkg/cmd/devtools.go index 893669cf34..1130a54e8d 100644 --- a/pkg/cmd/devtools.go +++ b/pkg/cmd/devtools.go @@ -14,7 +14,6 @@ import ( "github.com/aws/aws-sdk-go/aws/credentials" "github.com/go-logr/zerologr" grpclog "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/logging" - grpcprom "github.com/grpc-ecosystem/go-grpc-prometheus" "github.com/jzelinskie/cobrautil/v2" "github.com/jzelinskie/cobrautil/v2/cobragrpc" "github.com/jzelinskie/cobrautil/v2/cobrahttp" @@ -62,7 +61,7 @@ func runfunc(cmd *cobra.Command, _ []string) error { grpc.ChainUnaryInterceptor( grpclog.UnaryServerInterceptor(server.InterceptorLogger(log.Logger)), otelgrpc.UnaryServerInterceptor(), // nolint: staticcheck - grpcprom.UnaryServerInterceptor, + server.GRPCMetricsUnaryInterceptor, )) if err != nil { log.Ctx(cmd.Context()).Fatal().Err(err).Msg("failed to create gRPC server") diff --git a/pkg/cmd/server/defaults.go b/pkg/cmd/server/defaults.go index 398217467a..3d25fda5f2 100644 --- a/pkg/cmd/server/defaults.go +++ b/pkg/cmd/server/defaults.go @@ -10,9 +10,9 @@ import ( "github.com/fatih/color" "github.com/go-logr/zerologr" + grpcprom "github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus" grpcauth "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/auth" grpclog "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/logging" - grpcprom "github.com/grpc-ecosystem/go-grpc-prometheus" "github.com/jzelinskie/cobrautil/v2" "github.com/jzelinskie/cobrautil/v2/cobraotel" "github.com/jzelinskie/cobrautil/v2/cobrazerolog" @@ -20,6 +20,7 @@ import ( "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/rs/zerolog" "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" + "go.opentelemetry.io/otel/trace" "google.golang.org/grpc" "google.golang.org/grpc/codes" @@ -78,7 +79,10 @@ func DefaultPreRunE(programName string) cobrautil.CobraRunFunc { func MetricsHandler(telemetryRegistry *prometheus.Registry, c *Config) http.Handler { mux := http.NewServeMux() - mux.Handle("/metrics", promhttp.Handler()) + mux.Handle("/metrics", promhttp.HandlerFor(prometheus.DefaultGatherer, promhttp.HandlerOpts{ + // Opt into OpenMetrics e.g. to support exemplars. + EnableOpenMetrics: true, + })) if telemetryRegistry != nil { mux.Handle("/telemetry", promhttp.HandlerFor(telemetryRegistry, promhttp.HandlerOpts{})) } @@ -148,6 +152,16 @@ type MiddlewareOption struct { enableResponseLog bool } +// GRPCMetricsUnaryInterceptor creates the default prometheus metrics interceptor for unary gRPCs +var GRPCMetricsUnaryInterceptor grpc.UnaryServerInterceptor + +// GRPCMetricsStreamingInterceptor creates the default prometheus metrics interceptor for streaming gRPCs +var GRPCMetricsStreamingInterceptor grpc.StreamServerInterceptor + +func init() { + GRPCMetricsUnaryInterceptor, GRPCMetricsStreamingInterceptor = createServerMetrics() +} + // DefaultUnaryMiddleware generates the default middleware chain used for the public SpiceDB Unary gRPC methods func DefaultUnaryMiddleware(opts MiddlewareOption) (*MiddlewareChain[grpc.UnaryServerInterceptor], error) { chain, err := NewMiddlewareChain([]ReferenceableMiddleware[grpc.UnaryServerInterceptor]{ @@ -173,7 +187,7 @@ func DefaultUnaryMiddleware(opts MiddlewareOption) (*MiddlewareChain[grpc.UnaryS NewUnaryMiddleware(). WithName(DefaultMiddlewareGRPCProm). - WithInterceptor(grpcprom.UnaryServerInterceptor). + WithInterceptor(GRPCMetricsUnaryInterceptor). Done(), NewUnaryMiddleware(). @@ -239,7 +253,7 @@ func DefaultStreamingMiddleware(opts MiddlewareOption) (*MiddlewareChain[grpc.St NewStreamMiddleware(). WithName(DefaultMiddlewareGRPCProm). - WithInterceptor(grpcprom.StreamServerInterceptor). + WithInterceptor(GRPCMetricsStreamingInterceptor). Done(), NewStreamMiddleware(). @@ -303,7 +317,7 @@ func DefaultDispatchMiddleware(logger zerolog.Logger, authFunc grpcauth.AuthFunc logmw.UnaryServerInterceptor(logmw.ExtractMetadataField("x-request-id", "requestID")), grpclog.UnaryServerInterceptor(InterceptorLogger(logger), defaultGRPCLogOptions...), otelgrpc.UnaryServerInterceptor(), // nolint: staticcheck - grpcprom.UnaryServerInterceptor, + GRPCMetricsUnaryInterceptor, grpcauth.UnaryServerInterceptor(authFunc), datastoremw.UnaryServerInterceptor(ds), servicespecific.UnaryServerInterceptor, @@ -312,7 +326,7 @@ func DefaultDispatchMiddleware(logger zerolog.Logger, authFunc grpcauth.AuthFunc logmw.StreamServerInterceptor(logmw.ExtractMetadataField("x-request-id", "requestID")), grpclog.StreamServerInterceptor(InterceptorLogger(logger), defaultGRPCLogOptions...), otelgrpc.StreamServerInterceptor(), // nolint: staticcheck - grpcprom.StreamServerInterceptor, + GRPCMetricsStreamingInterceptor, grpcauth.StreamServerInterceptor(authFunc), datastoremw.StreamServerInterceptor(ds), servicespecific.StreamServerInterceptor, @@ -338,3 +352,23 @@ func InterceptorLogger(l zerolog.Logger) grpclog.Logger { } }) } + +// initializes prometheus grpc interceptors with exemplar support enabled +func createServerMetrics() (grpc.UnaryServerInterceptor, grpc.StreamServerInterceptor) { + srvMetrics := grpcprom.NewServerMetrics( + grpcprom.WithServerHandlingTimeHistogram( + grpcprom.WithHistogramBuckets([]float64{.001, .003, .006, .010, .018, .024, .032, .042, .056, .075, .100, .178, .316, .562, 1, 5}), + ), + ) + + prometheus.DefaultRegisterer.MustRegister(srvMetrics) + exemplarFromContext := func(ctx context.Context) prometheus.Labels { + if span := trace.SpanContextFromContext(ctx); span.IsSampled() { + return prometheus.Labels{"traceID": span.TraceID().String()} + } + return nil + } + + exemplarContext := grpcprom.WithExemplarFromContext(exemplarFromContext) + return srvMetrics.UnaryServerInterceptor(exemplarContext), srvMetrics.StreamServerInterceptor(exemplarContext) +} diff --git a/pkg/cmd/server/server.go b/pkg/cmd/server/server.go index 563e5ce208..a3a90e45b1 100644 --- a/pkg/cmd/server/server.go +++ b/pkg/cmd/server/server.go @@ -7,7 +7,6 @@ import ( "net" "net/http" "strconv" - "sync" "time" "github.com/authzed/consistent" @@ -15,7 +14,6 @@ import ( "github.com/cespare/xxhash/v2" "github.com/ecordell/optgen/helpers" grpc_auth "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/auth" - grpcprom "github.com/grpc-ecosystem/go-grpc-prometheus" "github.com/hashicorp/go-multierror" "github.com/prometheus/client_golang/prometheus" "github.com/rs/cors" @@ -237,8 +235,6 @@ func (c *Config) Complete(ctx context.Context) (RunnableServer, error) { ds = schemacaching.NewCachingDatastoreProxy(ds, nscc, c.DatastoreConfig.GCWindow, cachingMode, c.SchemaWatchHeartbeat) closeables.AddWithError(ds.Close) - enableGRPCHistogram() - specificConcurrencyLimits := c.DispatchConcurrencyLimits concurrencyLimits := specificConcurrencyLimits.WithOverallDefaultLimit(c.GlobalDispatchConcurrencyLimit) @@ -631,17 +627,3 @@ func (c *completedServerConfig) Run(ctx context.Context) error { return nil } - -var promOnce sync.Once - -// enableGRPCHistogram enables the standard time history for gRPC requests, -// ensuring that it is only enabled once -func enableGRPCHistogram() { - // EnableHandlingTimeHistogram is not thread safe and only needs to happen - // once - promOnce.Do(func() { - grpcprom.EnableHandlingTimeHistogram(grpcprom.WithHistogramBuckets( - []float64{.006, .010, .018, .024, .032, .042, .056, .075, .100, .178, .316, .562, 1.000}, - )) - }) -}