Skip to content

Commit

Permalink
Merge pull request #29055 from vespa-engine/hakonhall/support-exclusi…
Browse files Browse the repository at this point in the history
…ve-allocation-on-non-exclusive-host

Support exclusive allocation on non-exclusive host
  • Loading branch information
jonmv authored Oct 23, 2023
2 parents ebd1eec + 6f5a6e1 commit c5f0cae
Show file tree
Hide file tree
Showing 11 changed files with 240 additions and 90 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ public Optional<Id> combinedId() {
return combinedId;
}


/**
* Returns whether the physical hosts running the nodes of this application can
* also run nodes of other applications. Using exclusive nodes for containers increases security and cost.
Expand Down
8 changes: 7 additions & 1 deletion flags/src/main/java/com/yahoo/vespa/flags/Flags.java
Original file line number Diff line number Diff line change
Expand Up @@ -336,11 +336,17 @@ public class Flags {

public static final UnboundBooleanFlag EXCLUSIVE_PROVISIONING = defineFeatureFlag(
"exclusive-provisioning", false,
List.of("hakonhall"), "2023-10-12", "2023-12-12",
List.of("hakonhall"), "2023-10-12", "2023-12-20",
"Whether to provision a host exclusively to an application ID only based on exclusive=\"true\" from services.xml. " +
"Enabling this will produce hosts with exclusiveTo[ApplicationId] without provisionedToApplicationId.",
"Takes immediate effect when provisioning new hosts");

public static final UnboundBooleanFlag MAKE_EXCLUSIVE = defineFeatureFlag(
"make-exclusive", false,
List.of("hakonhall"), "2023-10-20", "2023-12-20",
"Allow an exclusive allocation to a non-exclusive host, but if so, make the host exclusive.",
"Takes immediate effect on any following preparation of clusters");

public static final UnboundBooleanFlag WRITE_CONFIG_SERVER_SESSION_DATA_AS_ONE_BLOB = defineFeatureFlag(
"write-config-server-session-data-as-blob", false,
List.of("hmusum"), "2023-07-19", "2023-11-01",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,11 @@ public boolean exclusiveAllocation(ClusterSpec clusterSpec) {
( !zone().cloud().allowHostSharing() && !sharedHosts.value().isEnabled(clusterSpec.type().name()));
}

/** Whether the nodes of this cluster must be running on hosts that are specifically provisioned for the application. */
public boolean exclusiveProvisioning(ClusterSpec clusterSpec) {
return !zone.cloud().allowHostSharing() && clusterSpec.isExclusive();
}

/**
* Returns ACLs for the children of the given host.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,19 +63,24 @@ public void remove(ApplicationTransaction transaction) {
db.deleteApplication(transaction);
}

public record Lock(Mutex mutex, ApplicationId application) implements Mutex {
@Override
public void close() { mutex.close(); }
}

/** Create a lock which provides exclusive rights to making changes to the given application */
public Mutex lock(ApplicationId application) {
return db.lock(application);
public Lock lock(ApplicationId application) {
return new Lock(db.lock(application), application);
}

/** Create a lock with a timeout which provides exclusive rights to making changes to the given application */
public Mutex lock(ApplicationId application, Duration timeout) {
return db.lock(application, timeout);
public Lock lock(ApplicationId application, Duration timeout) {
return new Lock(db.lock(application, timeout), application);
}

/** Create a lock which provides exclusive rights to perform a maintenance deployment */
public Mutex lockMaintenance(ApplicationId application) {
return db.lockMaintenance(application);
public Lock lockMaintenance(ApplicationId application) {
return new Lock(db.lockMaintenance(application), application);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
import com.yahoo.jdisc.Metric;
import com.yahoo.lang.MutableInteger;
import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.flags.BooleanFlag;
import com.yahoo.vespa.flags.FlagSource;
import com.yahoo.vespa.flags.Flags;
import com.yahoo.vespa.flags.ListFlag;
import com.yahoo.vespa.flags.PermanentFlags;
import com.yahoo.vespa.flags.custom.ClusterCapacity;
Expand Down Expand Up @@ -59,6 +61,7 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer {

private final HostProvisioner hostProvisioner;
private final ListFlag<ClusterCapacity> preprovisionCapacityFlag;
private final BooleanFlag makeExclusiveFlag;
private final ProvisioningThrottler throttler;

HostCapacityMaintainer(NodeRepository nodeRepository,
Expand All @@ -69,6 +72,7 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer {
super(nodeRepository, interval, metric);
this.hostProvisioner = hostProvisioner;
this.preprovisionCapacityFlag = PermanentFlags.PREPROVISION_CAPACITY.bindTo(flagSource);
this.makeExclusiveFlag = Flags.MAKE_EXCLUSIVE.bindTo(flagSource);
this.throttler = new ProvisioningThrottler(nodeRepository, metric);
}

Expand Down Expand Up @@ -187,14 +191,15 @@ static boolean canDeprovision(Node node) {
*/
private List<Node> provisionUntilNoDeficit(NodeList nodeList) {
List<ClusterCapacity> preprovisionCapacity = preprovisionCapacityFlag.value();
boolean makeExclusive = makeExclusiveFlag.value();

// Worst-case each ClusterCapacity in preprovisionCapacity will require an allocation.
int maxProvisions = preprovisionCapacity.size();

var nodesPlusProvisioned = new ArrayList<>(nodeList.asList());
for (int numProvisions = 0;; ++numProvisions) {
var nodesPlusProvisionedPlusAllocated = new ArrayList<>(nodesPlusProvisioned);
Optional<ClusterCapacity> deficit = allocatePreprovisionCapacity(preprovisionCapacity, nodesPlusProvisionedPlusAllocated);
Optional<ClusterCapacity> deficit = allocatePreprovisionCapacity(preprovisionCapacity, nodesPlusProvisionedPlusAllocated, makeExclusive);
if (deficit.isEmpty()) {
return nodesPlusProvisionedPlusAllocated;
}
Expand Down Expand Up @@ -250,11 +255,12 @@ private List<Node> provisionHosts(int count, NodeResources nodeResources, Option
* @return the part of a cluster capacity it was unable to allocate, if any
*/
private Optional<ClusterCapacity> allocatePreprovisionCapacity(List<ClusterCapacity> preprovisionCapacity,
ArrayList<Node> mutableNodes) {
ArrayList<Node> mutableNodes,
boolean makeExclusive) {
for (int clusterIndex = 0; clusterIndex < preprovisionCapacity.size(); ++clusterIndex) {
ClusterCapacity clusterCapacity = preprovisionCapacity.get(clusterIndex);
LockedNodeList allNodes = new LockedNodeList(mutableNodes, () -> {});
List<Node> candidates = findCandidates(clusterCapacity, clusterIndex, allNodes);
List<Node> candidates = findCandidates(clusterCapacity, clusterIndex, allNodes, makeExclusive);
int deficit = Math.max(0, clusterCapacity.count() - candidates.size());
if (deficit > 0) {
return Optional.of(clusterCapacity.withCount(deficit));
Expand All @@ -267,7 +273,7 @@ private Optional<ClusterCapacity> allocatePreprovisionCapacity(List<ClusterCapac
return Optional.empty();
}

private List<Node> findCandidates(ClusterCapacity clusterCapacity, int clusterIndex, LockedNodeList allNodes) {
private List<Node> findCandidates(ClusterCapacity clusterCapacity, int clusterIndex, LockedNodeList allNodes, boolean makeExclusive) {
NodeResources nodeResources = toNodeResources(clusterCapacity);

// We'll allocate each ClusterCapacity as a unique cluster in a dummy application
Expand All @@ -281,12 +287,16 @@ private List<Node> findCandidates(ClusterCapacity clusterCapacity, int clusterIn
NodePrioritizer prioritizer = new NodePrioritizer(allNodes, applicationId, clusterSpec, nodeSpec,
true, allocationContext, nodeRepository().nodes(), nodeRepository().resourcesCalculator(),
nodeRepository().spareCount());
List<NodeCandidate> nodeCandidates = prioritizer.collect().stream()
.filter(node -> ! node.violatesExclusivity(clusterSpec,
applicationId,
nodeRepository().exclusiveAllocation(clusterSpec),
nodeRepository().zone().cloud().allowHostSharing(),
allNodes))
List<NodeCandidate> nodeCandidates = prioritizer.collect()
.stream()
.filter(node -> node.violatesExclusivity(clusterSpec,
applicationId,
nodeRepository().exclusiveAllocation(clusterSpec),
false,
nodeRepository().zone().cloud().allowHostSharing(),
allNodes,
makeExclusive)
!= NodeCandidate.ExclusivityViolation.YES)
.toList();
MutableInteger index = new MutableInteger(0);
return nodeCandidates
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import com.yahoo.config.provision.ApplicationTransaction;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.Flavor;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.Zone;
import com.yahoo.time.TimeBudget;
Expand Down Expand Up @@ -224,6 +223,23 @@ public void setRemovable(NodeList nodes, boolean reusable) {
performOn(nodes, (node, mutex) -> write(node.with(node.allocation().get().removable(true, reusable)), mutex));
}

/** Sets the exclusiveToApplicationId field. The nodes must be tenant hosts without the field already. */
public void setExclusiveToApplicationId(List<Node> hosts, Applications.Lock lock) {
List<Node> hostsToWrite = hosts.stream()
.filter(host -> !host.exclusiveToApplicationId().equals(Optional.of(lock.application())))
.peek(host -> {
if (host.type() != NodeType.host)
throw new IllegalArgumentException("Unable to set " + host + " exclusive to " + lock.application() +
": the node is not a tenant host");
if (host.exclusiveToApplicationId().isPresent())
throw new IllegalArgumentException("Unable to set " + host + " exclusive to " + lock.application() +
": it is already set exclusive to " + host.exclusiveToApplicationId().get());
})
.map(host -> host.withExclusiveToApplicationId(lock.application()))
.toList();
write(hostsToWrite, lock);
}

/**
* Deactivates these nodes in a transaction and returns the nodes in the new state which will hold if the
* transaction commits.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,10 @@ class NodeAllocation {

private final NodeRepository nodeRepository;
private final Optional<String> requiredHostFlavor;
private final boolean makeExclusive;

NodeAllocation(NodeList allNodes, ApplicationId application, ClusterSpec cluster, NodeSpec requested,
Supplier<Integer> nextIndex, NodeRepository nodeRepository) {
Supplier<Integer> nextIndex, NodeRepository nodeRepository, boolean makeExclusive) {
this.allNodes = allNodes;
this.application = application;
this.cluster = cluster;
Expand All @@ -99,6 +100,7 @@ class NodeAllocation {
.with(FetchVector.Dimension.CLUSTER_ID, cluster.id().value())
.value())
.filter(s -> !s.isBlank());
this.makeExclusive = makeExclusive;
}

/**
Expand Down Expand Up @@ -139,9 +141,13 @@ else if ( ! saturated() && hasCompatibleResources(candidate)) {
++rejectedDueToClashingParentHost;
continue;
}
if ( violatesExclusivity(candidate)) {
++rejectedDueToExclusivity;
continue;
switch (violatesExclusivity(candidate, makeExclusive)) {
case PARENT_HOST_NOT_EXCLUSIVE -> candidate = candidate.withExclusiveParent(true);
case NONE -> {}
case YES -> {
++rejectedDueToExclusivity;
continue;
}
}
if (candidate.wantToRetire()) {
continue;
Expand Down Expand Up @@ -169,7 +175,7 @@ private Retirement shouldRetire(NodeCandidate candidate, List<NodeCandidate> can
if (candidate.parent.map(node -> node.status().wantToUpgradeFlavor()).orElse(false)) return Retirement.violatesHostFlavorGeneration;
if (candidate.wantToRetire()) return Retirement.hardRequest;
if (candidate.preferToRetire() && candidate.replaceableBy(candidates)) return Retirement.softRequest;
if (violatesExclusivity(candidate)) return Retirement.violatesExclusivity;
if (violatesExclusivity(candidate, makeExclusive) != NodeCandidate.ExclusivityViolation.NONE) return Retirement.violatesExclusivity;
if (requiredHostFlavor.isPresent() && ! candidate.parent.map(node -> node.flavor().name()).equals(requiredHostFlavor)) return Retirement.violatesHostFlavor;
if (candidate.violatesSpares) return Retirement.violatesSpares;
return Retirement.none;
Expand All @@ -186,18 +192,15 @@ private boolean checkForClashingParentHost() {
}

private boolean offeredNodeHasParentHostnameAlreadyAccepted(NodeCandidate candidate) {
for (NodeCandidate acceptedNode : nodes.values()) {
if (acceptedNode.parentHostname().isPresent() && candidate.parentHostname().isPresent() &&
acceptedNode.parentHostname().get().equals(candidate.parentHostname().get())) {
return true;
}
}
return false;
if (candidate.parentHostname().isEmpty()) return false;
return nodes.values().stream().anyMatch(acceptedNode -> acceptedNode.parentHostname().equals(candidate.parentHostname()));
}

private boolean violatesExclusivity(NodeCandidate candidate) {
return candidate.violatesExclusivity(cluster, application, nodeRepository.exclusiveAllocation(cluster),
nodeRepository.zone().cloud().allowHostSharing(), allNodes);
private NodeCandidate.ExclusivityViolation violatesExclusivity(NodeCandidate candidate, boolean makeExclusive) {
return candidate.violatesExclusivity(cluster, application,
nodeRepository.exclusiveAllocation(cluster),
nodeRepository.exclusiveProvisioning(cluster),
nodeRepository.zone().cloud().allowHostSharing(), allNodes, makeExclusive);
}

/**
Expand Down Expand Up @@ -378,6 +381,14 @@ NodeType nodeType() {
return requested.type();
}

List<Node> parentsRequiredToBeExclusive() {
return nodes.values()
.stream()
.filter(candidate -> candidate.exclusiveParent)
.map(candidate -> candidate.parent.orElseThrow())
.toList();
}

List<Node> finalNodes() {
GroupAssigner groupAssigner = new GroupAssigner(requested, allNodes, nodeRepository.clock());
Collection<NodeCandidate> finalNodes = groupAssigner.assignTo(nodes.values());
Expand Down
Loading

0 comments on commit c5f0cae

Please sign in to comment.