diff --git a/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java b/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java index c413dc5e7d76..119f56a613da 100644 --- a/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java +++ b/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java @@ -66,6 +66,9 @@ public enum ConfigServerMetrics implements VespaMetrics { CLUSTER_LOAD_IDEAL_CPU("cluster.load.ideal.cpu", Unit.FRACTION, "The ideal cpu load of a certain cluster"), CLUSTER_LOAD_IDEAL_MEMORY("cluster.load.ideal.memory", Unit.FRACTION, "The ideal memory load of a certain cluster"), CLUSTER_LOAD_IDEAL_DISK("cluster.load.ideal.disk", Unit.FRACTION, "The ideal disk load of a certain cluster"), + CLUSTER_LOAD_PEAK_CPU("cluster.load.peak.cpu", Unit.FRACTION, "The peak cpu load in the period considered of a certain cluster"), + CLUSTER_LOAD_PEAK_MEMORY("cluster.load.peak.memory", Unit.FRACTION, "The peak memory load in the period considered of a certain cluster"), + CLUSTER_LOAD_PEAK_DISK("cluster.load.peak.disk", Unit.FRACTION, "The peak disk load in the period considered of a certain cluster"), ZONE_WORKING("zone.working", Unit.BINARY, "The value 1 if zone is considered healthy, 0 if not. This is decided by considering the number of non-active nodes vs the number of active nodes in a zone"), CACHE_NODE_OBJECT_HIT_RATE("cache.nodeObject.hitRate", Unit.FRACTION, "The fraction of cache hits vs cache lookups for the node object cache"), diff --git a/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java b/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java index 2b9bc0b64227..ce3cdda7a114 100644 --- a/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java +++ b/metrics/src/main/java/ai/vespa/metrics/set/InfrastructureMetricSet.java @@ -67,6 +67,9 @@ private static Set getConfigServerMetrics() { addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_CPU.max()); addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_MEMORY.max()); addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_DISK.max()); + addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_PEAK_CPU.max()); + addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_PEAK_MEMORY.max()); + addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_PEAK_DISK.max()); addMetric(metrics, ConfigServerMetrics.NODES_EMPTY_EXCLUSIVE.max()); addMetric(metrics, ConfigServerMetrics.NODES_EXPIRED_DEPROVISIONED.count()); addMetric(metrics, ConfigServerMetrics.NODES_EXPIRED_DIRTY.count()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java index a2cb27246c68..cf7e0cf8fa3a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java @@ -143,6 +143,9 @@ private void updateClusterCostMetrics(ClusterId clusterId, metric.set(ConfigServerMetrics.CLUSTER_LOAD_IDEAL_CPU.baseName(), cluster.get().target().ideal().cpu(), context); metric.set(ConfigServerMetrics.CLUSTER_LOAD_IDEAL_MEMORY.baseName(), cluster.get().target().ideal().memory(), context); metric.set(ConfigServerMetrics.CLUSTER_LOAD_IDEAL_DISK.baseName(), cluster.get().target().ideal().disk(), context); + metric.set(ConfigServerMetrics.CLUSTER_LOAD_PEAK_CPU.baseName(), cluster.get().target().peak().cpu(), context); + metric.set(ConfigServerMetrics.CLUSTER_LOAD_PEAK_MEMORY.baseName(), cluster.get().target().peak().memory(), context); + metric.set(ConfigServerMetrics.CLUSTER_LOAD_PEAK_DISK.baseName(), cluster.get().target().peak().disk(), context); } private void updateZoneMetrics() {