Skip to content

Commit

Permalink
Merge pull request #31444 from vespa-engine/bratseth/test-unfulfilabl…
Browse files Browse the repository at this point in the history
…e-memory

Test that we don't rescale when container memory is unfulfiled
  • Loading branch information
mpolden authored Jun 5, 2024
2 parents 52e29fb + 66ee600 commit 31b51c7
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -311,10 +311,10 @@ void traceQuery(String sourceName, String type, Query query, int offset, int hit
if (query.getTrace().isTraceable(level + 1) && query.getTrace().getQuery()) {
query.trace("Current state of query tree: "
+ new TextualQueryRepresentation(query.getModel().getQueryTree().getRoot()),
false, level+1);
false, level + 1);
}
if (query.getTrace().isTraceable(level + 2) && query.getTrace().getQuery()) {
query.trace("YQL+ representation: " + query.yqlRepresentation(), level+2);
query.trace("YQL+ representation: " + query.yqlRepresentation(), level + 2);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@ public ClusterResources advertisedResources() {
*/
public double fulfilment() { return fulfilment; }

public boolean notFulfiled() {
return fulfilment < 0.9999999;
}

private static double fulfilment(ClusterResources realResources, ClusterResources idealResources) {
double vcpuFulfilment = Math.min(1, realResources.totalResources().vcpu() / idealResources.totalResources().vcpu());
double memoryGbFulfilment = Math.min(1, realResources.totalResources().memoryGb() / idealResources.totalResources().memoryGb());
Expand All @@ -128,7 +132,7 @@ private static double fulfilment(ClusterResources realResources, ClusterResource

public boolean preferableTo(AllocatableResources other, ClusterModel model) {
// always fulfil as much as possible unless fulfilment is considered to be equal
if (!equal(this.fulfilment(), other.fulfilment()) && (other.fulfilment() < 1 || this.fulfilment() < 1))
if ((other.fulfilment() < 1 || this.fulfilment() < 1) && ! equal(this.fulfilment(), other.fulfilment()))
return this.fulfilment() > other.fulfilment();

return this.cost() * toHours(model.allocationDuration()) + this.costChangingFrom(model)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,9 @@ public Autoscaling autoscale(Application application, Cluster cluster, NodeList
return Autoscaling.dontScale(Status.waiting, "Cluster change in progress", model);

var loadAdjustment = model.loadAdjustment();
if (enableDetailedLogging) {
if (enableDetailedLogging)
log.info("Application: " + application.id().toShortString() + ", loadAdjustment: " + loadAdjustment.toString());
}

// Ensure we only scale down if we'll have enough headroom to not scale up again given a small load increase
var target = allocationOptimizer.findBestAllocation(loadAdjustment, model, limits, enableDetailedLogging);

if (target.isEmpty())
Expand All @@ -98,8 +96,8 @@ private Autoscaling toAutoscaling(AllocatableResources target, ClusterModel mode
return Autoscaling.dontScale(Status.unavailable, "Autoscaling is disabled in single node clusters", model);

if (! worthRescaling(model.current().realResources(), target.realResources())) {
if (target.fulfilment() < 0.9999999)
return Autoscaling.dontScale(Status.insufficient, "Configured limits prevents ideal scaling of this cluster", model);
if (target.notFulfiled())
return Autoscaling.dontScale(Status.insufficient, "Cluster cannot be scaled to achieve ideal load", model);
else if ( ! model.safeToScaleDown() && model.idealLoad().any(v -> v < 1.0))
return Autoscaling.dontScale(Status.ideal, "Cooling off before considering to scale down", model);
else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,21 @@ public ClusterModel(NodeRepository nodeRepository,
this.at = clock.instant();
}


/**
* The central decision made in autoscaling.
*
* @return the relative load adjustment that should be made to this cluster given available measurements.
* For example, a load adjustment of 2 means we should allocate twice the amount of that resources.
*/
public Load loadAdjustment() {
if (nodeTimeseries().measurementsPerNode() < 0.5) return Load.one(); // Don't change based on very little data
Load adjustment = peakLoad().divide(idealLoad());
if (! safeToScaleDown())
adjustment = adjustment.map(v -> v < 1 ? 1 : v);
return adjustment;
}

public Application application() { return application; }
public ClusterSpec clusterSpec() { return clusterSpec; }
public CloudAccount cloudAccount() { return cluster.cloudAccount().orElse(CloudAccount.empty); }
Expand All @@ -133,11 +148,7 @@ public ClusterModel(NodeRepository nodeRepository,
private ClusterTimeseries clusterTimeseries() { return clusterTimeseries; }

/** Returns the instant this model was created. */
public Instant at() { return at;}

public boolean isEmpty() {
return nodeTimeseries().isEmpty();
}
public Instant at() { return at; }

/** Returns the predicted duration of a rescaling of this cluster */
public Duration scalingDuration() { return scalingDuration; }
Expand All @@ -148,9 +159,9 @@ public boolean isEmpty() {
*/
public Duration allocationDuration() { return allocationDuration; }

public boolean isContent() {
return clusterSpec.type().isContent();
}
public boolean isEmpty() { return nodeTimeseries().isEmpty(); }

public boolean isContent() { return clusterSpec.type().isContent(); }

/** Returns the predicted duration of data redistribution in this cluster. */
public Duration redistributionDuration() {
Expand All @@ -177,15 +188,6 @@ public boolean isExclusive() {
return nodeRepository.exclusivity().allocation(clusterSpec);
}

/** Returns the relative load adjustment that should be made to this cluster given available measurements. */
public Load loadAdjustment() {
if (nodeTimeseries().measurementsPerNode() < 0.5) return Load.one(); // Don't change based on very little data
Load adjustment = peakLoad().divide(idealLoad());
if (! safeToScaleDown())
adjustment = adjustment.map(v -> v < 1 ? 1 : v);
return adjustment;
}

public boolean isStable(NodeRepository nodeRepository) {
// The cluster is processing recent changes
if (nodes.stream().anyMatch(node -> node.status().wantToRetire() ||
Expand Down Expand Up @@ -218,7 +220,6 @@ public Load loadAdjustmentWith(int nodes, int groups, Load loadAdjustment) {
.divide(redundancyAdjustment()); // correct for double redundancy adjustment
}


/**
* Returns the relative load adjustment accounting for redundancy given these nodes+groups
* relative to node nodes+groups in this.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ private static double divide(double a, double b) {

@Override
public String toString() {
return "load: " + cpu + " cpu, " + memory + " memory, " + disk + " disk," + gpu + " gpu," + gpuMemory + " gpuMemory";
return "load: " + cpu + " cpu, " + memory + " memory, " + disk + " disk, " + gpu + " gpu, " + gpuMemory + " gpuMemory";
}

public static Load zero() { return new Load(0, 0, 0, 0, 0); }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,23 @@ public void test_container_scaling_down_exclusive() {
fixture.autoscale());
}

@Test
public void test_containers_wont_scale_up_on_memory() {
var min = new ClusterResources(2, 1, new NodeResources(4, 8, 50, 0.1));
var now = new ClusterResources(4, 1, new NodeResources(4, 8, 50, 0.1));
var max = new ClusterResources(8, 1, new NodeResources(4, 8, 50, 0.1));
var fixture = DynamicProvisioningTester.fixture()
.awsProdSetup(false)
.clusterType(ClusterSpec.Type.container)
.initialResources(Optional.of(now))
.capacity(Capacity.from(min, max))
.build();
fixture.tester().setScalingDuration(fixture.applicationId(), fixture.clusterSpec.id(), Duration.ofMinutes(5));

fixture.loader().applyLoad(new Load(0.1875, 1.0, 0.95, 0, 0), 50);
assertEquals(Autoscaling.Status.insufficient, fixture.autoscale().status());
}

@Test
public void initial_deployment_with_host_sharing_flag() {
var min = new ClusterResources(7, 1, new NodeResources(2.0, 10.0, 384.0, 0.1));
Expand Down

0 comments on commit 31b51c7

Please sign in to comment.