From 719eed8640288fd1046557c5e6255d8d355520eb Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Wed, 21 Aug 2024 19:38:17 +0200 Subject: [PATCH] Skip getting metrics from new nodes --- .../autoscale/MetricsV2MetricsFetcher.java | 15 +++++++++++++-- .../autoscale/MetricsV2MetricsFetcherTest.java | 2 ++ .../maintenance/NodeMetricsDbMaintainerTest.java | 1 + 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcher.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcher.java index 748c9335c3c3..47427c58ab7a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcher.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcher.java @@ -17,6 +17,7 @@ import org.apache.hc.core5.concurrent.FutureCallback; import java.io.IOException; +import java.time.Duration; import java.util.Optional; import java.util.concurrent.CompletableFuture; import java.util.logging.Level; @@ -54,8 +55,11 @@ public CompletableFuture fetchMetrics(ApplicationId application NodeList applicationNodes = nodeRepository.nodes().list().owner(application).state(Node.State.active); Optional metricsV2Container = applicationNodes.container() - .matching(node -> expectedUp(node)) - .first(); + .matching(this::expectedUp) + .stream() + .filter(node -> ! newNode(node)) // Skip newly added nodes, as they may not be reachable + .findFirst(); + if (metricsV2Container.isEmpty()) { return CompletableFuture.completedFuture(MetricsResponse.empty()); } @@ -67,6 +71,13 @@ public CompletableFuture fetchMetrics(ApplicationId application } } + /** + * Returns true if this a new node (oldest node history event is less than 3 minutes old) + */ + private boolean newNode(Node node) { + return node.history().age(nodeRepository.clock().instant()).compareTo(Duration.ofMinutes(3)) <= 0; + } + @Override public void deconstruct() { httpClient.close(); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcherTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcherTest.java index a984306b5777..f7c28225cf78 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcherTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcherTest.java @@ -13,6 +13,7 @@ import com.yahoo.vespa.applicationmodel.HostName; import org.junit.Test; +import java.time.Duration; import java.util.ArrayList; import java.util.List; import java.util.concurrent.CompletableFuture; @@ -37,6 +38,7 @@ public void testMetricsFetch() throws Exception { MetricsV2MetricsFetcher fetcher = new MetricsV2MetricsFetcher(tester.nodeRepository(), orchestrator, httpClient); tester.makeReadyNodes(4, resources); // Creates (in order) host-1.yahoo.com, host-2.yahoo.com, host-3.yahoo.com, host-4.yahoo.com + tester.clock().advance(Duration.ofMinutes(5)); // Make sure these are not considered new nodes (metrics will not be fetched for them) tester.activateTenantHosts(); ApplicationId application1 = ProvisioningTester.applicationId(); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainerTest.java index f4503ab672b0..e5bf67d0bf95 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainerTest.java @@ -34,6 +34,7 @@ public void testNodeMetricsDbMaintainer() { ProvisioningTester tester = new ProvisioningTester.Builder().build(); tester.clock().setInstant(Instant.ofEpochMilli(1400)); tester.makeReadyNodes(2, resources); + tester.advanceTime(Duration.ofMinutes(5)); // Make sure these are not considered new nodes (metrics will not be fetched for them) tester.activateTenantHosts(); tester.deploy(ProvisioningTester.applicationId("test"), Capacity.from(new ClusterResources(2, 1, resources)));