Skip to content

Commit

Permalink
Merge pull request #19356 from vespa-engine/freva/allow-more-inplace
Browse files Browse the repository at this point in the history
Allow in-place resize for non-content nodes when decreasing resources…
  • Loading branch information
Jon Bratseth authored Sep 29, 2021
2 parents 1bc2cca + 0c26679 commit 22d3521
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ private NodeCandidate candidateFrom(Node node, boolean isSurplus) {
parent.exclusiveToApplicationId().isEmpty()
&& requestedNodes.canResize(node.resources(),
capacity.availableCapacityOf(parent),
clusterSpec.type(),
topologyChange,
currentClusterSize));
} else {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.provisioning;

import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.Flavor;
import com.yahoo.config.provision.NodeFlavors;
import com.yahoo.config.provision.NodeResources;
Expand Down Expand Up @@ -72,7 +73,7 @@ default boolean fulfilledBy(int count) {
* in-place to resources in this spec.
*/
default boolean canResize(NodeResources currentNodeResources, NodeResources currentSpareHostResources,
boolean hasTopologyChange, int currentClusterSize) {
ClusterSpec.Type type, boolean hasTopologyChange, int currentClusterSize) {
return false;
}

Expand Down Expand Up @@ -153,12 +154,12 @@ public boolean needsResize(Node node) {

@Override
public boolean canResize(NodeResources currentNodeResources, NodeResources currentSpareHostResources,
boolean hasTopologyChange, int currentClusterSize) {
ClusterSpec.Type type, boolean hasTopologyChange, int currentClusterSize) {
// Never allow in-place resize when also changing topology or decreasing cluster size
if (hasTopologyChange || count < currentClusterSize) return false;

// Do not allow increasing cluster size and decreasing node resources at the same time
if (count > currentClusterSize && !requestedNodeResources.satisfies(currentNodeResources.justNumbers()))
// Do not allow increasing cluster size and decreasing node resources at the same time for content nodes
if (type.isContent() && count > currentClusterSize && !requestedNodeResources.satisfies(currentNodeResources.justNumbers()))
return false;

// Otherwise, allowed as long as the host can satisfy the new requested resources
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,17 @@
import static org.junit.Assert.fail;

/**
* If there is no change in cluster size or topology, any increase in node resource allocation is fine as long as:
* a. We have the necessary spare resources available on the all hosts used in the cluster
* b. We have the necessary spare resources available on a subset of the hosts used in the cluster AND
* also have available capacity to migrate the remaining nodes to different hosts.
* c. Any decrease in node resource allocation is fine.
* Node resources can be increased in-place if
* 1. No change to topology
* 2. No reduction to cluster size
* 3. There is enough spare capacity on host
*
* If there is an increase in cluster size, this can be combined with increase in resource allocations given there is
* available resources and new nodes.
* Node resources can be decreased in-place if
* 1. No change to topology
* 2. No reduction to cluster size
* 3. For content/combined nodes: No increase to cluster size
*
* No other changes should be supported at this time, due to risks in complexity and possibly unknowns.
* Specifically, the following is intentionally not supported by the above changes:
* a. Decrease in resource allocation combined with cluster size increase
* b. Change in resource allocation combined with cluster size reduction
* c. Change in resource allocation combined with cluster topology changes
* Node resources are increased if at least one of the components (vcpu, memory, disk, bandwidth) is increased.
*
* @author freva
*/
Expand Down Expand Up @@ -218,10 +215,10 @@ public void increase_size_decrease_resources() {
public void cannot_inplace_decrease_resources_while_increasing_cluster_size() {
addParentHosts(6, mediumResources.with(fast).with(local));

new PrepareHelper(tester, app).prepare(container1, 4, 1, mediumResources).activate();
assertSizeAndResources(container1, 4, new NodeResources(4, 8, 160, 1, fast, local));
new PrepareHelper(tester, app).prepare(content1, 4, 1, mediumResources).activate();
assertSizeAndResources(content1, 4, new NodeResources(4, 8, 160, 1, fast, local));

new PrepareHelper(tester, app).prepare(container1, 6, 1, smallResources);
new PrepareHelper(tester, app).prepare(content1, 6, 1, smallResources);
}

@Test(expected = OutOfCapacityException.class)
Expand Down

0 comments on commit 22d3521

Please sign in to comment.