Skip to content

Commit

Permalink
Merge pull request #25221 from vespa-engine/ogronnesby/maintainer-exe…
Browse files Browse the repository at this point in the history
…cution-time

Create a metric for maintainer execution time
  • Loading branch information
Øyvind Grønnesby authored Dec 12, 2022
2 parents 74142ab + 4ea035b commit fc01804
Show file tree
Hide file tree
Showing 13 changed files with 35 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public class ApplicationPackageMaintainer extends ConfigServerMaintainer {
Curator curator,
Duration interval,
FlagSource flagSource) {
super(applicationRepository, curator, flagSource, applicationRepository.clock().instant(), interval, false);
super(applicationRepository, curator, flagSource, applicationRepository.clock(), interval, false);
this.applicationRepository = applicationRepository;
this.configserverConfig = applicationRepository.configserverConfig();
this.downloadDirectory = new File(Defaults.getDefaults().underVespaHome(configserverConfig.fileReferencesDir()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import com.yahoo.vespa.flags.ListFlag;
import com.yahoo.vespa.flags.PermanentFlags;

import java.time.Clock;
import java.time.Duration;
import java.time.Instant;
import java.util.Arrays;
Expand All @@ -33,8 +34,8 @@ public abstract class ConfigServerMaintainer extends Maintainer {

/** Creates a maintainer where maintainers on different nodes in this cluster run with even delay. */
ConfigServerMaintainer(ApplicationRepository applicationRepository, Curator curator, FlagSource flagSource,
Instant now, Duration interval, boolean useLock) {
super(null, interval, now, new JobControl(new JobControlFlags(curator, flagSource, useLock)),
Clock clock, Duration interval, boolean useLock) {
super(null, interval, clock, new JobControl(new JobControlFlags(curator, flagSource, useLock)),
new ConfigServerJobMetrics(applicationRepository.metric()), cluster(curator), false);
this.applicationRepository = applicationRepository;
}
Expand All @@ -48,8 +49,10 @@ public ConfigServerJobMetrics(Metric metric) {
}

@Override
public void completed(String job, double successFactor) {
metric.set("maintenance.successFactor", successFactor, metric.createContext(Map.of("job", job)));
public void completed(String job, double successFactor, long durationMs) {
var context = metric.createContext(Map.of("job", job));
metric.set("maintenance.successFactor", successFactor, context);
metric.set("maintenance.duration", durationMs, context);
}

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public class FileDistributionMaintainer extends ConfigServerMaintainer {
Duration interval,
FlagSource flagSource,
FileDirectory fileDirectory) {
super(applicationRepository, curator, flagSource, applicationRepository.clock().instant(), interval, false);
super(applicationRepository, curator, flagSource, applicationRepository.clock(), interval, false);
this.applicationRepository = applicationRepository;
ConfigserverConfig configserverConfig = applicationRepository.configserverConfig();
this.maxUnusedFileReferenceAge = Duration.ofMinutes(configserverConfig.keepUnusedFileReferencesMinutes());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public class ReindexingMaintainer extends ConfigServerMaintainer {

public ReindexingMaintainer(ApplicationRepository applicationRepository, Curator curator, FlagSource flagSource,
Duration interval, ConfigConvergenceChecker convergence, Clock clock) {
super(applicationRepository, curator, flagSource, clock.instant(), interval, true);
super(applicationRepository, curator, flagSource, clock, interval, true);
this.convergence = convergence;
this.clock = clock;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
public class SessionsMaintainer extends ConfigServerMaintainer {

SessionsMaintainer(ApplicationRepository applicationRepository, Curator curator, Duration interval, FlagSource flagSource) {
super(applicationRepository, curator, flagSource, applicationRepository.clock().instant(), interval, true);
super(applicationRepository, curator, flagSource, applicationRepository.clock(), interval, true);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public class TenantsMaintainer extends ConfigServerMaintainer {

TenantsMaintainer(ApplicationRepository applicationRepository, Curator curator, FlagSource flagSource,
Duration interval, Clock clock) {
super(applicationRepository, curator, flagSource, applicationRepository.clock().instant(), interval, true);
super(applicationRepository, curator, flagSource, applicationRepository.clock(), interval, true);
this.ttlForUnusedTenant = defaultTtlForUnusedTenant;
this.clock = clock;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public ControllerMaintainer(Controller controller, Duration interval) {
}

public ControllerMaintainer(Controller controller, Duration interval, String name, Set<SystemName> activeSystems) {
super(name, interval, controller.clock().instant(), controller.jobControl(),
super(name, interval, controller.clock(), controller.jobControl(),
new ControllerJobMetrics(controller.metric()), controller.curator().cluster(), true);
this.controller = controller;
this.activeSystems = Set.copyOf(Objects.requireNonNull(activeSystems));
Expand All @@ -56,8 +56,9 @@ public ControllerJobMetrics(Metric metric) {
}

@Override
public void completed(String job, double successFactor) {
public void completed(String job, double successFactor, long durationMs) {
metric.set("maintenance.successFactor", successFactor, metric.createContext(Map.of("job", job)));
metric.set("maintenance.duration", durationMs, metric.createContext(Map.of("job", job)));
}

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public abstract class NodeRepositoryMaintainer extends Maintainer {
private final NodeRepository nodeRepository;

public NodeRepositoryMaintainer(NodeRepository nodeRepository, Duration interval, Metric metric) {
super(null, interval, nodeRepository.clock().instant(), nodeRepository.jobControl(),
super(null, interval, nodeRepository.clock(), nodeRepository.jobControl(),
new NodeRepositoryJobMetrics(metric), nodeRepository.database().cluster(), true);
this.nodeRepository = nodeRepository;
}
Expand Down Expand Up @@ -57,8 +57,10 @@ public NodeRepositoryJobMetrics(Metric metric) {
}

@Override
public void completed(String job, double successFactor) {
metric.set("maintenance.successFactor", successFactor, metric.createContext(Map.of("job", job)));
public void completed(String job, double successFactor, long duration) {
var context = metric.createContext(Map.of("job", job));
metric.set("maintenance.successFactor", successFactor, context);
metric.set("maintenance.duration", duration, context);
}

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,6 @@ public abstract class JobMetrics {
* Records completion of a run of a job.
* This is guaranteed to always be called once after each maintainer run.
*/
public abstract void completed(String job, double successFactor);
public abstract void completed(String job, double successFactor, long durationMs);

}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import java.math.BigDecimal;
import java.math.RoundingMode;
import java.time.Clock;
import java.time.Duration;
import java.time.Instant;
import java.util.List;
Expand Down Expand Up @@ -35,15 +36,17 @@ public abstract class Maintainer implements Runnable {
private final ScheduledExecutorService service;
private final AtomicBoolean shutDown = new AtomicBoolean();
private final boolean ignoreCollision;
private final Clock clock;

public Maintainer(String name, Duration interval, Instant startedAt, JobControl jobControl,
public Maintainer(String name, Duration interval, Clock clock, JobControl jobControl,
JobMetrics jobMetrics, List<String> clusterHostnames, boolean ignoreCollision) {
this.name = name;
this.interval = requireInterval(interval);
this.jobControl = Objects.requireNonNull(jobControl);
this.jobMetrics = Objects.requireNonNull(jobMetrics);
this.ignoreCollision = ignoreCollision;
Objects.requireNonNull(startedAt);
this.clock = clock;
var startedAt = clock.instant();
Objects.requireNonNull(clusterHostnames);
Duration initialDelay = staggeredDelay(interval, startedAt, HostName.getLocalhost(), clusterHostnames)
.plus(Duration.ofSeconds(30)); // Let the system stabilize before maintenance
Expand Down Expand Up @@ -109,6 +112,7 @@ public final void lockAndMaintain(boolean force) {
log.log(Level.FINE, () -> "Running " + this.getClass().getSimpleName());

double successFactor = 0;
long startTime = clock.millis();
try (var lock = jobControl.lockJob(name())) {
successFactor = maintain();
}
Expand All @@ -122,7 +126,8 @@ public final void lockAndMaintain(boolean force) {
log.log(Level.WARNING, this + " failed. Will retry in " + interval, e);
}
finally {
jobMetrics.completed(name(), successFactor);
long endTime = clock.millis();
jobMetrics.completed(name(), successFactor, endTime - startTime);
}
log.log(Level.FINE, () -> "Finished " + this.getClass().getSimpleName());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ public void testJobControlMayDeactivateJobs() {
private static class NoopJobMetrics extends JobMetrics {

@Override
public void completed(String job, double successFactor) { }
public void completed(String job, double successFactor, long durationMs) { }

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,12 @@ public void success_metric() {
private static class TestJobMetrics extends JobMetrics {

double successFactor = 0.0;
long durationMs = 0;

@Override
public void completed(String job, double successFactor) {
public void completed(String job, double successFactor, long durationMs) {
this.successFactor = successFactor;
this.durationMs = durationMs;
}

}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.concurrent.maintenance;

import java.time.Clock;
import java.time.Duration;
import java.time.Instant;
import java.util.List;
Expand All @@ -15,7 +16,7 @@ class TestMaintainer extends Maintainer {
private RuntimeException exceptionToThrow = null;

public TestMaintainer(String name, JobControl jobControl, JobMetrics jobMetrics) {
super(name, Duration.ofDays(1), Instant.now(), jobControl, jobMetrics, List.of(), false);
super(name, Duration.ofDays(1), Clock.systemUTC(), jobControl, jobMetrics, List.of(), false);
}

public int totalRuns() {
Expand Down

0 comments on commit fc01804

Please sign in to comment.