From fcc1d879c8b1d2fc664cb9d90a536b59e7c8162c Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Fri, 23 Aug 2024 12:51:49 +0200 Subject: [PATCH] Update cluster state publish time prior to updating convergence metrics We use the time point of the last published state to infer if nodes should be counted as not converged, but this must be done for the publish time of the _new_ state, not the old state. Otherwise we risk over-counting. --- .../com/yahoo/vespa/clustercontroller/core/FleetController.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index b83b6b0deeb2..5fe4be7fbcf9 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -347,11 +347,11 @@ public void handleNewPublishedState(ClusterStateBundle stateBundle) { verifyInControllerThread(); ClusterState baselineState = stateBundle.getBaselineClusterState(); newStates.add(stateBundle); + systemStateBroadcaster.handleNewClusterStates(stateBundle); metricUpdater.updateClusterStateMetrics(cluster, baselineState, ResourceUsageStats.calculateFrom(cluster.getNodeInfos(), options.clusterFeedBlockLimit(), stateBundle.getFeedBlock()), systemStateBroadcaster.getLastStateBroadcastTimePoint()); lastMetricUpdateCycleCount = cycleCount; - systemStateBroadcaster.handleNewClusterStates(stateBundle); // Iff master, always store new version in ZooKeeper _before_ publishing to any // nodes so that a cluster controller crash after publishing but before a successful // ZK store will not risk reusing the same version number.