-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add query sampling for checking #7
base: db_obs_m3coord_cache
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,7 +25,10 @@ package prom | |
import ( | ||
"context" | ||
"errors" | ||
"math" | ||
"math/rand" | ||
"net/http" | ||
"sort" | ||
"sync" | ||
|
||
"github.com/m3db/m3/src/query/api/v1/handler/prometheus/handleroptions" | ||
|
@@ -84,14 +87,67 @@ var ( | |
params.Now) | ||
} | ||
} | ||
|
||
// Ratio of queries we make a check for | ||
DefaultCheckSampleRate float64 = 0.0 | ||
// Threshold in % to determine if there's difference in results (1 means 1% diff) | ||
DefaultComparePercentThreshold float64 = 1.0 | ||
) | ||
|
||
// Compares results a, b to the specified percent threshold | ||
// Results should be vectors | ||
func compareResults(a, b *promql.Result, threshold float64) bool { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What may cause the value different? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think there can be some slight floating point precision errors especially with comparison, so I thought a % threshold would be best |
||
if a.Value.Type() != parser.ValueTypeVector || b.Value.Type() != parser.ValueTypeVector { | ||
return false | ||
} | ||
v1 := a.Value.(promql.Vector) | ||
v2 := b.Value.(promql.Vector) | ||
|
||
if len(v1) != len(v2) { | ||
return false | ||
} else { | ||
sort.Slice(v1, func(i, j int) bool { | ||
return v1[i].Metric.String() < v1[j].Metric.String() | ||
}) | ||
sort.Slice(v2, func(i, j int) bool { | ||
return v2[i].Metric.String() < v2[j].Metric.String() | ||
}) | ||
|
||
for i := range v1 { | ||
if v1[i].Metric.String() != v2[i].Metric.String() { | ||
return false | ||
} | ||
if v1[i].Point.V == 0 && v2[i].Point.V != 0 { | ||
return false | ||
} | ||
percent_diff := math.Abs(v1[i].Point.V-v2[i].Point.V) / v1[i].Point.V * 100 | ||
if percent_diff > threshold { | ||
return false | ||
} | ||
} | ||
} | ||
return true | ||
} | ||
|
||
type queryCheckMetrics struct { | ||
queryCheckMismatch tally.Counter | ||
queryCheckTotal tally.Counter | ||
} | ||
|
||
type queryCheckConfig struct { | ||
CheckSampleRate float64 | ||
ComparePercentThreshold float64 | ||
} | ||
|
||
type readHandler struct { | ||
hOpts options.HandlerOptions | ||
scope tally.Scope | ||
logger *zap.Logger | ||
opts opts | ||
returnedDataMetrics native.PromReadReturnedDataMetrics | ||
|
||
queryCheckConfig queryCheckConfig | ||
queryCheckMetrics queryCheckMetrics | ||
} | ||
|
||
func newReadHandler( | ||
|
@@ -101,12 +157,29 @@ func newReadHandler( | |
scope := hOpts.InstrumentOpts().MetricsScope().Tagged( | ||
map[string]string{"handler": "prometheus-read"}, | ||
) | ||
queryCheckMetrics := queryCheckMetrics{ | ||
queryCheckMismatch: scope.Counter("query_check_mismatches"), | ||
queryCheckTotal: scope.Counter("query_check_total"), | ||
} | ||
checkSampleRate := DefaultCheckSampleRate | ||
comparePercentThreshold := DefaultComparePercentThreshold | ||
// If specified, use config, otherwise default | ||
if hOpts.Config().RedisCacheSpec != nil { | ||
checkSampleRate = hOpts.Config().RedisCacheSpec.CheckSampleRate | ||
comparePercentThreshold = hOpts.Config().RedisCacheSpec.ComparePercentThreshold | ||
} | ||
queryCheckConfig := queryCheckConfig{ | ||
CheckSampleRate: checkSampleRate, | ||
ComparePercentThreshold: comparePercentThreshold, | ||
} | ||
return &readHandler{ | ||
hOpts: hOpts, | ||
opts: options, | ||
scope: scope, | ||
logger: hOpts.InstrumentOpts().Logger(), | ||
returnedDataMetrics: native.NewPromReadReturnedDataMetrics(scope), | ||
queryCheckMetrics: queryCheckMetrics, | ||
queryCheckConfig: queryCheckConfig, | ||
}, nil | ||
} | ||
|
||
|
@@ -149,6 +222,7 @@ func (h *readHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { | |
defer qry.Close() | ||
|
||
res := qry.Exec(ctx) | ||
// h.logger.Info("final result", zap.String("result", res.Value.String()), zap.Float64("base", CheckSampleRate)) | ||
if res.Err != nil { | ||
h.logger.Error("error executing query", | ||
zap.Error(res.Err), zap.String("query", params.Query), | ||
|
@@ -177,6 +251,27 @@ func (h *readHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { | |
return | ||
} | ||
|
||
// Rulemanager results are vector values (list of metric + value) | ||
// Take a random number and check if under rate so we check a proportion of the queries | ||
if rand.Float64() < float64(h.queryCheckConfig.CheckSampleRate) && res.Value.Type() == parser.ValueTypeVector { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not a big deal, but do we need float64, why not float32? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we move this check into |
||
query, err := h.opts.newQueryFn(params) | ||
if err != nil { | ||
h.logger.Error("Comparison query failed to create") | ||
} | ||
defer query.Close() | ||
// Set context so we can default to M3DB later on | ||
result := query.Exec(context.WithValue(ctx, "UseM3DB", true)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. rename name to m3dbQueryResult to avoid confusion between |
||
if result.Err != nil { | ||
h.logger.Error("Comparison query failed to execute") | ||
} else { | ||
if result != nil && !compareResults(res, result, h.queryCheckConfig.ComparePercentThreshold) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How do you know this |
||
h.queryCheckMetrics.queryCheckMismatch.Inc(1) | ||
h.logger.Info("mismatch", zap.String("query", qry.String())) | ||
} | ||
h.queryCheckMetrics.queryCheckTotal.Inc(1) | ||
} | ||
} | ||
|
||
for _, warn := range resultMetadata.Warnings { | ||
res.Warnings = append(res.Warnings, errors.New(warn.Message)) | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
1% difference cross all buckets?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So it checks the final aggregated result, so it's a 1% difference in the final result