Skip to content

Commit

Permalink
remove high cardinality labels server_route_id and server_gateway_id (#…
Browse files Browse the repository at this point in the history
…192)

* remove high cardinality labels server_route_id and server_gateway_id

Signed-off-by: Caleb Lloyd <[email protected]>

* remap route/gateway IDs to indexes

Signed-off-by: Caleb Lloyd <[email protected]>

* lint

Signed-off-by: Caleb Lloyd <[email protected]>

---------

Signed-off-by: Caleb Lloyd <[email protected]>
  • Loading branch information
caleblloyd authored Mar 8, 2024
1 parent a3f567d commit 4dcf040
Show file tree
Hide file tree
Showing 8 changed files with 156 additions and 149 deletions.
1 change: 0 additions & 1 deletion .github/workflows/go.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ jobs:
--disable gocritic \
--enable stylecheck \
--enable unconvert \
--enable gocyclo \
--enable gofmt \
--enable misspell \
--enable unparam \
Expand Down
131 changes: 2 additions & 129 deletions README.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@
{
"expr": "rate(nats_core_route_sent_msg_count{server_cluster=~\"$cluster\"}[1m])",
"interval": "",
"legendFormat": "{{server_name}} - ID {{server_route_id}}",
"legendFormat": "Server {{server_name}} - Route {{server_route_name}}",
"refId": "A"
}
],
Expand Down Expand Up @@ -527,7 +527,7 @@
{
"expr": "rate(nats_core_route_recv_msg_count{server_cluster=~\"$cluster\"}[1m])",
"interval": "",
"legendFormat": "{{server_name}} - ID {{server_route_id}}",
"legendFormat": "Server {{server_name}} - Route {{server_route_name}}",
"refId": "A"
}
],
Expand Down Expand Up @@ -641,7 +641,7 @@
{
"expr": "rate(nats_core_route_sent_bytes{server_cluster=~\"$cluster\"}[1m])",
"interval": "",
"legendFormat": "{{server_name}} - ID {{server_route_id}}",
"legendFormat": "Server {{server_name}} - Route {{server_route_name}}",
"refId": "A"
}
],
Expand Down Expand Up @@ -739,7 +739,7 @@
"targets": [
{
"expr": "rate(nats_core_route_recv_bytes{server_cluster=~\"$cluster\"}[1m])",
"legendFormat": "{{server_name}} - ID {{server_route_id}}",
"legendFormat": "Server {{server_name}} - Route {{server_route_name}}",
"refId": "A"
}
],
Expand Down Expand Up @@ -852,7 +852,7 @@
"targets": [
{
"expr": "nats_core_route_pending_bytes{server_cluster=~\"$cluster\"}",
"legendFormat": "{{server_name}} - {{server_route_id}}",
"legendFormat": "Server {{server_name}} - Route {{server_route_name}}",
"refId": "A"
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3091,7 +3091,7 @@
"targets": [
{
"expr": "rate(nats_core_route_sent_bytes{server_cluster=~\"$cluster\"}[1m])",
"legendFormat": "{{server_name}} - ID {{server_route_id}}",
"legendFormat": "Server {{server_name}} - Route {{server_route_name}}",
"refId": "A"
}
],
Expand Down Expand Up @@ -3178,7 +3178,7 @@
"targets": [
{
"expr": "rate(nats_core_route_recv_bytes{server_cluster=~\"$cluster\"}[1m])",
"legendFormat": "{{server_name}} - ID {{server_route_id}}",
"legendFormat": "Server {{server_name}} - Route {{server_route_name}}",
"refId": "A"
}
],
Expand Down Expand Up @@ -3265,7 +3265,7 @@
"targets": [
{
"expr": "rate(nats_core_route_sent_msg_count{server_cluster=~\"$cluster\"}[1m])",
"legendFormat": "{{server_name}} - ID {{server_route_id}}",
"legendFormat": "Server {{server_name}} - Route {{server_route_name}}",
"refId": "A"
}
],
Expand Down Expand Up @@ -3352,7 +3352,7 @@
"targets": [
{
"expr": "rate(nats_core_route_recv_msg_count{server_cluster=~\"$cluster\"}[1m])",
"legendFormat": "{{server_name}} - ID {{server_route_id}}",
"legendFormat": "Server {{server_name}} - Route {{server_route_name}}",
"refId": "A"
}
],
Expand Down Expand Up @@ -3526,7 +3526,7 @@
"targets": [
{
"expr": "nats_core_route_pending_bytes{server_cluster=~\"$cluster\"}",
"legendFormat": "{{server_name}} - {{server_route_id}}",
"legendFormat": "Server {{server_name}} - Route {{server_route_name}}",
"refId": "A"
}
],
Expand Down
1 change: 0 additions & 1 deletion scripts/lint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ $(go env GOPATH)/bin/golangci-lint run \
--enable interfacer \
--enable unconvert \
--enable dupl \
--enable gocyclo \
--enable gofmt \
--enable goimports \
--enable misspell \
Expand Down
92 changes: 88 additions & 4 deletions surveyor/collector_statz.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"encoding/json"
"fmt"
"io"
"slices"
"sort"
"strconv"
"strings"
Expand All @@ -29,6 +30,7 @@ import (
"github.com/nats-io/nats.go"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
"golang.org/x/exp/maps"
"golang.org/x/sync/singleflight"
)

Expand Down Expand Up @@ -139,6 +141,8 @@ type StatzCollector struct {
descs statzDescs
collectAccounts bool
natsUp *prometheus.Desc
routeIDRemap map[string]map[uint64]int
gatewayIDRemap map[string]map[uint64]int

serverLabels []string
serverInfoLabels []string
Expand Down Expand Up @@ -221,11 +225,25 @@ func (sc *StatzCollector) serverInfoLabelValues(sm *server.ServerStatsMsg) []str
}

func (sc *StatzCollector) routeLabelValues(sm *server.ServerStatsMsg, rStat *server.RouteStat) []string {
return []string{sm.Server.Cluster, serverName(sm), sm.Server.ID, strconv.FormatUint(rStat.ID, 10)}
idxS := strconv.FormatUint(rStat.ID, 10)
if byName, ok := sc.routeIDRemap[rStat.Name]; ok {
if idx, ok := byName[rStat.ID]; ok {
idxS = strconv.Itoa(idx)
}
}

return []string{sm.Server.Cluster, serverName(sm), sm.Server.ID, rStat.Name, idxS}
}

func (sc *StatzCollector) gatewayLabelValues(sm *server.ServerStatsMsg, gStat *server.GatewayStat) []string {
return []string{sm.Server.Cluster, serverName(sm), sm.Server.ID, gStat.Name, strconv.FormatUint(gStat.ID, 10)}
idxS := strconv.FormatUint(gStat.ID, 10)
if byName, ok := sc.gatewayIDRemap[gStat.Name]; ok {
if idx, ok := byName[gStat.ID]; ok {
idxS = strconv.Itoa(idx)
}
}

return []string{sm.Server.Cluster, serverName(sm), sm.Server.ID, gStat.Name, idxS}
}

// Up/Down on servers - look at discovery mechanisms in Prometheus - aging out, how does it work?
Expand Down Expand Up @@ -383,12 +401,14 @@ func NewStatzCollector(nc *nats.Conn, logger *logrus.Logger, numServers int, ser
servers: make(map[string]bool),
doneCh: make(chan struct{}, 1),
collectAccounts: accounts,
routeIDRemap: make(map[string]map[uint64]int),
gatewayIDRemap: make(map[string]map[uint64]int),

// TODO - normalize these if possible. Jetstream varies from the other server labels
serverLabels: []string{"server_cluster", "server_name", "server_id"},
serverInfoLabels: []string{"server_cluster", "server_name", "server_id", "server_version"},
routeLabels: []string{"server_cluster", "server_name", "server_id", "server_route_id"},
gatewayLabels: []string{"server_cluster", "server_name", "server_id", "server_gateway_name", "server_gateway_id"},
routeLabels: []string{"server_cluster", "server_name", "server_id", "server_route_name", "server_route_name_idx"},
gatewayLabels: []string{"server_cluster", "server_name", "server_id", "server_gateway_name", "server_gateway_name_idx"},
jsServerLabels: []string{"server_id", "server_name", "cluster_name"},
jsServerInfoLabels: []string{"server_name", "server_host", "server_id", "server_cluster", "server_domain", "server_version", "server_jetstream"},
constLabels: constLabels,
Expand Down Expand Up @@ -1013,6 +1033,14 @@ func (sc *StatzCollector) Collect(ch chan<- prometheus.Metric) {
}
}
}

pairs := make([]nameIDPair, len(sm.Stats.Routes))
for i, rs := range sm.Stats.Routes {
pairs[i].id = rs.ID
pairs[i].name = rs.Name
}
sc.routeIDRemap = remapIDToIdx(pairs, sc.routeIDRemap)

for _, rs := range sm.Stats.Routes {
labels = sc.routeLabelValues(sm, rs)
metrics.newGaugeMetric(sc.descs.RouteSentMsgs, float64(rs.Sent.Msgs), labels)
Expand All @@ -1022,6 +1050,13 @@ func (sc *StatzCollector) Collect(ch chan<- prometheus.Metric) {
metrics.newGaugeMetric(sc.descs.RoutePending, float64(rs.Pending), labels)
}

pairs = make([]nameIDPair, len(sm.Stats.Gateways))
for i, rs := range sm.Stats.Gateways {
pairs[i].id = rs.ID
pairs[i].name = rs.Name
}
sc.gatewayIDRemap = remapIDToIdx(pairs, sc.gatewayIDRemap)

for _, gw := range sm.Stats.Gateways {
labels = sc.gatewayLabelValues(sm, gw)
metrics.newGaugeMetric(sc.descs.GatewaySentMsgs, float64(gw.Sent.Msgs), labels)
Expand Down Expand Up @@ -1184,3 +1219,52 @@ func unmarshalMsg(msg *nats.Msg, v any) error {

return json.Unmarshal(data, v)
}

type nameIDPair struct {
name string
id uint64
}

func remapIDToIdx(pairs []nameIDPair, existingMapping map[string]map[uint64]int) map[string]map[uint64]int {
newMapping := make(map[string]map[uint64]int)

// give existing the same idx
for _, rs := range pairs {
newByName, ok := newMapping[rs.name]
if !ok {
newByName = make(map[uint64]int)
newMapping[rs.name] = newByName
}

existingByName, ok := existingMapping[rs.name]
if !ok {
continue
}

idx, ok := existingByName[rs.id]
if !ok {
continue
}

newByName[rs.id] = idx
}

// assign new ones new idx
for _, path := range pairs {
newByName := newMapping[path.name]
_, ok := newByName[path.id]
if ok {
continue
}

vals := maps.Values(newByName)
for i := 0; i <= len(vals); i++ {
if !slices.Contains(vals, i) {
newByName[path.id] = i
break
}
}
}

return newMapping
}
49 changes: 49 additions & 0 deletions surveyor/collector_statz_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package surveyor

import (
"reflect"
"testing"
)

func TestRemapIdToIdx(t *testing.T) {
existingMapping := map[string]map[uint64]int{
"a": {
100: 0,
200: 2,
},
"b": {
100: 0,
},
}

pairs := []nameIDPair{
{name: "a", id: 200},
{name: "a", id: 100},
{name: "a", id: 300},
{name: "a", id: 400},
{name: "b", id: 200},
{name: "c", id: 200},
{name: "c", id: 100},
}

newMapping := remapIDToIdx(pairs, existingMapping)
expected := map[string]map[uint64]int{
"a": {
100: 0,
200: 2,
300: 1,
400: 3,
},
"b": {
200: 0,
},
"c": {
200: 0,
100: 1,
},
}

if !reflect.DeepEqual(expected, newMapping) {
t.Fatalf("Invalid mapping config; want: %v; got: %v", expected, newMapping)
}
}
11 changes: 7 additions & 4 deletions surveyor/surveyor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,14 @@ func TestSurveyor_Basic(t *testing.T) {
if !strings.Contains(output, "server_gateway_name") {
t.Fatalf("invalid output, missing 'server_gateway_name': %v\n", output)
}
if !strings.Contains(output, "server_gateway_id") {
t.Fatalf("invalid output, missing 'server_gateway_id': %v\n", output)
if !strings.Contains(output, "server_gateway_name_idx") {
t.Fatalf("invalid output, missing 'server_gateway_name_idx': %v\n", output)
}
if !strings.Contains(output, "server_route_id") {
t.Fatalf("invalid output, missing 'server_route_id': %v\n", output)
if !strings.Contains(output, "server_route_name") {
t.Fatalf("invalid output, missing 'server_route_name': %v\n", output)
}
if !strings.Contains(output, "server_route_name_idx") {
t.Fatalf("invalid output, missing 'server_route_name_idx': %v\n", output)
}
if !strings.Contains(output, "nats_survey_surveyed_count 3") {
t.Fatalf("invalid output, missing 'nats_survey_surveyed_count 3': %v\n", output)
Expand Down

0 comments on commit 4dcf040

Please sign in to comment.