diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterSpec.java b/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterSpec.java index d0b4ad9e9176..bb9bf8db4f33 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterSpec.java +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterSpec.java @@ -77,7 +77,6 @@ public Optional combinedId() { return combinedId; } - /** * Returns whether the physical hosts running the nodes of this application can * also run nodes of other applications. Using exclusive nodes for containers increases security and cost. diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index d02cccb28857..fb667d60ab0c 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -336,11 +336,17 @@ public class Flags { public static final UnboundBooleanFlag EXCLUSIVE_PROVISIONING = defineFeatureFlag( "exclusive-provisioning", false, - List.of("hakonhall"), "2023-10-12", "2023-12-12", + List.of("hakonhall"), "2023-10-12", "2023-12-20", "Whether to provision a host exclusively to an application ID only based on exclusive=\"true\" from services.xml. " + "Enabling this will produce hosts with exclusiveTo[ApplicationId] without provisionedToApplicationId.", "Takes immediate effect when provisioning new hosts"); + public static final UnboundBooleanFlag MAKE_EXCLUSIVE = defineFeatureFlag( + "make-exclusive", false, + List.of("hakonhall"), "2023-10-20", "2023-12-20", + "Allow an exclusive allocation to a non-exclusive host, but if so, make the host exclusive.", + "Takes immediate effect on any following preparation of clusters"); + public static final UnboundBooleanFlag WRITE_CONFIG_SERVER_SESSION_DATA_AS_ONE_BLOB = defineFeatureFlag( "write-config-server-session-data-as-blob", false, List.of("hmusum"), "2023-07-19", "2023-11-01", diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java index 83db3712c176..449e1c07bf81 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java @@ -212,6 +212,11 @@ public boolean exclusiveAllocation(ClusterSpec clusterSpec) { ( !zone().cloud().allowHostSharing() && !sharedHosts.value().isEnabled(clusterSpec.type().name())); } + /** Whether the nodes of this cluster must be running on hosts that are specifically provisioned for the application. */ + public boolean exclusiveProvisioning(ClusterSpec clusterSpec) { + return !zone.cloud().allowHostSharing() && clusterSpec.isExclusive(); + } + /** * Returns ACLs for the children of the given host. * diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java index 5b0180bad433..fdf4a7ae838b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java @@ -63,19 +63,24 @@ public void remove(ApplicationTransaction transaction) { db.deleteApplication(transaction); } + public record Lock(Mutex mutex, ApplicationId application) implements Mutex { + @Override + public void close() { mutex.close(); } + } + /** Create a lock which provides exclusive rights to making changes to the given application */ - public Mutex lock(ApplicationId application) { - return db.lock(application); + public Lock lock(ApplicationId application) { + return new Lock(db.lock(application), application); } /** Create a lock with a timeout which provides exclusive rights to making changes to the given application */ - public Mutex lock(ApplicationId application, Duration timeout) { - return db.lock(application, timeout); + public Lock lock(ApplicationId application, Duration timeout) { + return new Lock(db.lock(application, timeout), application); } /** Create a lock which provides exclusive rights to perform a maintenance deployment */ - public Mutex lockMaintenance(ApplicationId application) { - return db.lockMaintenance(application); + public Lock lockMaintenance(ApplicationId application) { + return new Lock(db.lockMaintenance(application), application); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java index b2dde608ed29..a5135ca0e1fb 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java @@ -13,7 +13,9 @@ import com.yahoo.jdisc.Metric; import com.yahoo.lang.MutableInteger; import com.yahoo.transaction.Mutex; +import com.yahoo.vespa.flags.BooleanFlag; import com.yahoo.vespa.flags.FlagSource; +import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.flags.ListFlag; import com.yahoo.vespa.flags.PermanentFlags; import com.yahoo.vespa.flags.custom.ClusterCapacity; @@ -59,6 +61,7 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer { private final HostProvisioner hostProvisioner; private final ListFlag preprovisionCapacityFlag; + private final BooleanFlag makeExclusiveFlag; private final ProvisioningThrottler throttler; HostCapacityMaintainer(NodeRepository nodeRepository, @@ -69,6 +72,7 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer { super(nodeRepository, interval, metric); this.hostProvisioner = hostProvisioner; this.preprovisionCapacityFlag = PermanentFlags.PREPROVISION_CAPACITY.bindTo(flagSource); + this.makeExclusiveFlag = Flags.MAKE_EXCLUSIVE.bindTo(flagSource); this.throttler = new ProvisioningThrottler(nodeRepository, metric); } @@ -187,6 +191,7 @@ static boolean canDeprovision(Node node) { */ private List provisionUntilNoDeficit(NodeList nodeList) { List preprovisionCapacity = preprovisionCapacityFlag.value(); + boolean makeExclusive = makeExclusiveFlag.value(); // Worst-case each ClusterCapacity in preprovisionCapacity will require an allocation. int maxProvisions = preprovisionCapacity.size(); @@ -194,7 +199,7 @@ private List provisionUntilNoDeficit(NodeList nodeList) { var nodesPlusProvisioned = new ArrayList<>(nodeList.asList()); for (int numProvisions = 0;; ++numProvisions) { var nodesPlusProvisionedPlusAllocated = new ArrayList<>(nodesPlusProvisioned); - Optional deficit = allocatePreprovisionCapacity(preprovisionCapacity, nodesPlusProvisionedPlusAllocated); + Optional deficit = allocatePreprovisionCapacity(preprovisionCapacity, nodesPlusProvisionedPlusAllocated, makeExclusive); if (deficit.isEmpty()) { return nodesPlusProvisionedPlusAllocated; } @@ -250,11 +255,12 @@ private List provisionHosts(int count, NodeResources nodeResources, Option * @return the part of a cluster capacity it was unable to allocate, if any */ private Optional allocatePreprovisionCapacity(List preprovisionCapacity, - ArrayList mutableNodes) { + ArrayList mutableNodes, + boolean makeExclusive) { for (int clusterIndex = 0; clusterIndex < preprovisionCapacity.size(); ++clusterIndex) { ClusterCapacity clusterCapacity = preprovisionCapacity.get(clusterIndex); LockedNodeList allNodes = new LockedNodeList(mutableNodes, () -> {}); - List candidates = findCandidates(clusterCapacity, clusterIndex, allNodes); + List candidates = findCandidates(clusterCapacity, clusterIndex, allNodes, makeExclusive); int deficit = Math.max(0, clusterCapacity.count() - candidates.size()); if (deficit > 0) { return Optional.of(clusterCapacity.withCount(deficit)); @@ -267,7 +273,7 @@ private Optional allocatePreprovisionCapacity(List findCandidates(ClusterCapacity clusterCapacity, int clusterIndex, LockedNodeList allNodes) { + private List findCandidates(ClusterCapacity clusterCapacity, int clusterIndex, LockedNodeList allNodes, boolean makeExclusive) { NodeResources nodeResources = toNodeResources(clusterCapacity); // We'll allocate each ClusterCapacity as a unique cluster in a dummy application @@ -281,12 +287,16 @@ private List findCandidates(ClusterCapacity clusterCapacity, int clusterIn NodePrioritizer prioritizer = new NodePrioritizer(allNodes, applicationId, clusterSpec, nodeSpec, true, allocationContext, nodeRepository().nodes(), nodeRepository().resourcesCalculator(), nodeRepository().spareCount()); - List nodeCandidates = prioritizer.collect().stream() - .filter(node -> ! node.violatesExclusivity(clusterSpec, - applicationId, - nodeRepository().exclusiveAllocation(clusterSpec), - nodeRepository().zone().cloud().allowHostSharing(), - allNodes)) + List nodeCandidates = prioritizer.collect() + .stream() + .filter(node -> node.violatesExclusivity(clusterSpec, + applicationId, + nodeRepository().exclusiveAllocation(clusterSpec), + false, + nodeRepository().zone().cloud().allowHostSharing(), + allNodes, + makeExclusive) + != NodeCandidate.ExclusivityViolation.YES) .toList(); MutableInteger index = new MutableInteger(0); return nodeCandidates diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java index 38c1306a08aa..8a79263946f0 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java @@ -6,7 +6,6 @@ import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Flavor; -import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.Zone; import com.yahoo.time.TimeBudget; @@ -224,6 +223,23 @@ public void setRemovable(NodeList nodes, boolean reusable) { performOn(nodes, (node, mutex) -> write(node.with(node.allocation().get().removable(true, reusable)), mutex)); } + /** Sets the exclusiveToApplicationId field. The nodes must be tenant hosts without the field already. */ + public void setExclusiveToApplicationId(List hosts, Applications.Lock lock) { + List hostsToWrite = hosts.stream() + .filter(host -> !host.exclusiveToApplicationId().equals(Optional.of(lock.application()))) + .peek(host -> { + if (host.type() != NodeType.host) + throw new IllegalArgumentException("Unable to set " + host + " exclusive to " + lock.application() + + ": the node is not a tenant host"); + if (host.exclusiveToApplicationId().isPresent()) + throw new IllegalArgumentException("Unable to set " + host + " exclusive to " + lock.application() + + ": it is already set exclusive to " + host.exclusiveToApplicationId().get()); + }) + .map(host -> host.withExclusiveToApplicationId(lock.application())) + .toList(); + write(hostsToWrite, lock); + } + /** * Deactivates these nodes in a transaction and returns the nodes in the new state which will hold if the * transaction commits. diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java index 9f6d4f159f63..0cb1eaa574c6 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java @@ -84,9 +84,10 @@ class NodeAllocation { private final NodeRepository nodeRepository; private final Optional requiredHostFlavor; + private final boolean makeExclusive; NodeAllocation(NodeList allNodes, ApplicationId application, ClusterSpec cluster, NodeSpec requested, - Supplier nextIndex, NodeRepository nodeRepository) { + Supplier nextIndex, NodeRepository nodeRepository, boolean makeExclusive) { this.allNodes = allNodes; this.application = application; this.cluster = cluster; @@ -99,6 +100,7 @@ class NodeAllocation { .with(FetchVector.Dimension.CLUSTER_ID, cluster.id().value()) .value()) .filter(s -> !s.isBlank()); + this.makeExclusive = makeExclusive; } /** @@ -139,9 +141,13 @@ else if ( ! saturated() && hasCompatibleResources(candidate)) { ++rejectedDueToClashingParentHost; continue; } - if ( violatesExclusivity(candidate)) { - ++rejectedDueToExclusivity; - continue; + switch (violatesExclusivity(candidate, makeExclusive)) { + case PARENT_HOST_NOT_EXCLUSIVE -> candidate = candidate.withExclusiveParent(true); + case NONE -> {} + case YES -> { + ++rejectedDueToExclusivity; + continue; + } } if (candidate.wantToRetire()) { continue; @@ -169,7 +175,7 @@ private Retirement shouldRetire(NodeCandidate candidate, List can if (candidate.parent.map(node -> node.status().wantToUpgradeFlavor()).orElse(false)) return Retirement.violatesHostFlavorGeneration; if (candidate.wantToRetire()) return Retirement.hardRequest; if (candidate.preferToRetire() && candidate.replaceableBy(candidates)) return Retirement.softRequest; - if (violatesExclusivity(candidate)) return Retirement.violatesExclusivity; + if (violatesExclusivity(candidate, makeExclusive) != NodeCandidate.ExclusivityViolation.NONE) return Retirement.violatesExclusivity; if (requiredHostFlavor.isPresent() && ! candidate.parent.map(node -> node.flavor().name()).equals(requiredHostFlavor)) return Retirement.violatesHostFlavor; if (candidate.violatesSpares) return Retirement.violatesSpares; return Retirement.none; @@ -186,18 +192,15 @@ private boolean checkForClashingParentHost() { } private boolean offeredNodeHasParentHostnameAlreadyAccepted(NodeCandidate candidate) { - for (NodeCandidate acceptedNode : nodes.values()) { - if (acceptedNode.parentHostname().isPresent() && candidate.parentHostname().isPresent() && - acceptedNode.parentHostname().get().equals(candidate.parentHostname().get())) { - return true; - } - } - return false; + if (candidate.parentHostname().isEmpty()) return false; + return nodes.values().stream().anyMatch(acceptedNode -> acceptedNode.parentHostname().equals(candidate.parentHostname())); } - private boolean violatesExclusivity(NodeCandidate candidate) { - return candidate.violatesExclusivity(cluster, application, nodeRepository.exclusiveAllocation(cluster), - nodeRepository.zone().cloud().allowHostSharing(), allNodes); + private NodeCandidate.ExclusivityViolation violatesExclusivity(NodeCandidate candidate, boolean makeExclusive) { + return candidate.violatesExclusivity(cluster, application, + nodeRepository.exclusiveAllocation(cluster), + nodeRepository.exclusiveProvisioning(cluster), + nodeRepository.zone().cloud().allowHostSharing(), allNodes, makeExclusive); } /** @@ -378,6 +381,14 @@ NodeType nodeType() { return requested.type(); } + List parentsRequiredToBeExclusive() { + return nodes.values() + .stream() + .filter(candidate -> candidate.exclusiveParent) + .map(candidate -> candidate.parent.orElseThrow()) + .toList(); + } + List finalNodes() { GroupAssigner groupAssigner = new GroupAssigner(requested, allNodes, nodeRepository.clock()); Collection finalNodes = groupAssigner.assignTo(nodes.values()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java index 05aa986b9ffb..10a8460614f5 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java @@ -63,7 +63,11 @@ public abstract class NodeCandidate implements Nodelike, Comparable parent, boolean violatesSpares, boolean exclusiveSwitch, boolean isSurplus, boolean isNew, boolean isResizeable) { + /** The parent host must become exclusive to the implied application */ + final boolean exclusiveParent; + + private NodeCandidate(NodeResources freeParentCapacity, Optional parent, boolean violatesSpares, boolean exclusiveSwitch, + boolean exclusiveParent, boolean isSurplus, boolean isNew, boolean isResizeable) { if (isResizeable && isNew) throw new IllegalArgumentException("A new node cannot be resizable"); @@ -71,6 +75,7 @@ private NodeCandidate(NodeResources freeParentCapacity, Optional parent, b this.parent = parent; this.violatesSpares = violatesSpares; this.exclusiveSwitch = exclusiveSwitch; + this.exclusiveParent = exclusiveParent; this.isSurplus = isSurplus; this.isNew = isNew; this.isResizable = isResizeable; @@ -99,6 +104,8 @@ private NodeCandidate(NodeResources freeParentCapacity, Optional parent, b /** Returns a copy of this with exclusive switch set to given value */ public abstract NodeCandidate withExclusiveSwitch(boolean exclusiveSwitch); + public abstract NodeCandidate withExclusiveParent(boolean exclusiveParent); + /** * Returns the node instance of this candidate, allocating it if necessary. * @@ -228,7 +235,7 @@ NodeCandidate withNode(Node node) { /** Returns a copy of this with node set to given value */ NodeCandidate withNode(Node node, boolean retiredNow) { - return new ConcreteNodeCandidate(node, retiredNow, freeParentCapacity, parent, violatesSpares, exclusiveSwitch, isSurplus, isNew, isResizable); + return new ConcreteNodeCandidate(node, retiredNow, freeParentCapacity, parent, violatesSpares, exclusiveSwitch, exclusiveParent, isSurplus, isNew, isResizable); } /** Returns the switch priority, based on switch exclusivity, of this compared to other */ @@ -271,7 +278,7 @@ public static NodeCandidate createChild(Node node, boolean isSurplus, boolean isNew, boolean isResizeable) { - return new ConcreteNodeCandidate(node, false, freeParentCapacity, Optional.of(parent), violatesSpares, true, isSurplus, isNew, isResizeable); + return new ConcreteNodeCandidate(node, false, freeParentCapacity, Optional.of(parent), violatesSpares, true, false, isSurplus, isNew, isResizeable); } public static NodeCandidate createNewChild(NodeResources resources, @@ -280,15 +287,15 @@ public static NodeCandidate createNewChild(NodeResources resources, boolean violatesSpares, LockedNodeList allNodes, IP.Allocation.Context ipAllocationContext) { - return new VirtualNodeCandidate(resources, freeParentCapacity, parent, violatesSpares, true, allNodes, ipAllocationContext); + return new VirtualNodeCandidate(resources, freeParentCapacity, parent, violatesSpares, true, false, allNodes, ipAllocationContext); } public static NodeCandidate createNewExclusiveChild(Node node, Node parent) { - return new ConcreteNodeCandidate(node, false, node.resources(), Optional.of(parent), false, true, false, true, false); + return new ConcreteNodeCandidate(node, false, node.resources(), Optional.of(parent), false, true, false, false, true, false); } public static NodeCandidate createStandalone(Node node, boolean isSurplus, boolean isNew) { - return new ConcreteNodeCandidate(node, false, node.resources(), Optional.empty(), false, true, isSurplus, isNew, false); + return new ConcreteNodeCandidate(node, false, node.resources(), Optional.empty(), false, true, false, isSurplus, isNew, false); } /** A candidate backed by a node */ @@ -300,9 +307,9 @@ static class ConcreteNodeCandidate extends NodeCandidate { ConcreteNodeCandidate(Node node, boolean retiredNow, NodeResources freeParentCapacity, Optional parent, - boolean violatesSpares, boolean exclusiveSwitch, + boolean violatesSpares, boolean exclusiveSwitch, boolean exclusiveParent, boolean isSurplus, boolean isNew, boolean isResizeable) { - super(freeParentCapacity, parent, violatesSpares, exclusiveSwitch, isSurplus, isNew, isResizeable); + super(freeParentCapacity, parent, violatesSpares, exclusiveSwitch, exclusiveParent, isSurplus, isNew, isResizeable); this.retiredNow = retiredNow; this.node = Objects.requireNonNull(node, "Node cannot be null"); } @@ -340,7 +347,7 @@ static class ConcreteNodeCandidate extends NodeCandidate { @Override public NodeCandidate allocate(ApplicationId owner, ClusterMembership membership, NodeResources requestedResources, Instant at) { return new ConcreteNodeCandidate(node.allocate(owner, membership, requestedResources, at), retiredNow, - freeParentCapacity, parent, violatesSpares, exclusiveSwitch, isSurplus, isNew, isResizable); + freeParentCapacity, parent, violatesSpares, exclusiveSwitch, exclusiveParent, isSurplus, isNew, isResizable); } /** Called when the node described by this candidate must be created */ @@ -350,7 +357,13 @@ public NodeCandidate allocate(ApplicationId owner, ClusterMembership membership, @Override public NodeCandidate withExclusiveSwitch(boolean exclusiveSwitch) { return new ConcreteNodeCandidate(node, retiredNow, freeParentCapacity, parent, violatesSpares, exclusiveSwitch, - isSurplus, isNew, isResizable); + exclusiveParent, isSurplus, isNew, isResizable); + } + + @Override + public NodeCandidate withExclusiveParent(boolean exclusiveParent) { + return new ConcreteNodeCandidate(node, retiredNow, freeParentCapacity, parent, violatesSpares, exclusiveSwitch, + exclusiveParent, isSurplus, isNew, isResizable); } @Override @@ -391,9 +404,10 @@ private VirtualNodeCandidate(NodeResources resources, Node parent, boolean violatesSpares, boolean exclusiveSwitch, + boolean exclusiveParent, LockedNodeList allNodes, IP.Allocation.Context ipAllocationContext) { - super(freeParentCapacity, Optional.of(parent), violatesSpares, exclusiveSwitch, false, true, false); + super(freeParentCapacity, Optional.of(parent), violatesSpares, exclusiveSwitch, exclusiveParent, false, true, false); this.resources = resources; this.allNodes = allNodes; this.ipAllocationContext = ipAllocationContext; @@ -453,13 +467,18 @@ public NodeCandidate withNode() { NodeType.tenant) .cloudAccount(parent.get().cloudAccount()) .build(); - return new ConcreteNodeCandidate(node, false, freeParentCapacity, parent, violatesSpares, exclusiveSwitch, isSurplus, isNew, isResizable); + return new ConcreteNodeCandidate(node, false, freeParentCapacity, parent, violatesSpares, exclusiveSwitch, exclusiveParent, isSurplus, isNew, isResizable); } @Override public NodeCandidate withExclusiveSwitch(boolean exclusiveSwitch) { - return new VirtualNodeCandidate(resources, freeParentCapacity, parent.get(), violatesSpares, exclusiveSwitch, allNodes, ipAllocationContext); + return new VirtualNodeCandidate(resources, freeParentCapacity, parent.get(), violatesSpares, exclusiveSwitch, exclusiveParent, allNodes, ipAllocationContext); + } + + @Override + public NodeCandidate withExclusiveParent(boolean exclusiveParent) { + return new VirtualNodeCandidate(resources, freeParentCapacity, parent.get(), violatesSpares, exclusiveSwitch, exclusiveParent, allNodes, ipAllocationContext); } @Override @@ -496,7 +515,7 @@ static class InvalidNodeCandidate extends NodeCandidate { private InvalidNodeCandidate(NodeResources resources, NodeResources freeParentCapacity, Node parent, String invalidReason) { - super(freeParentCapacity, Optional.of(parent), false, false, false, true, false); + super(freeParentCapacity, Optional.of(parent), false, false, false, false, true, false); this.resources = resources; this.invalidReason = invalidReason; } @@ -543,6 +562,11 @@ public NodeCandidate withExclusiveSwitch(boolean exclusiveSwitch) { return this; } + @Override + public NodeCandidate withExclusiveParent(boolean exclusiveParent) { + return this; + } + @Override public Node toNode() { throw new IllegalStateException("Candidate node on " + parent.get() + " is invalid: " + invalidReason); @@ -563,32 +587,51 @@ public String toString() { } - public boolean violatesExclusivity(ClusterSpec cluster, ApplicationId application, - boolean exclusiveCluster, boolean hostSharing, NodeList allNodes) { - if (parentHostname().isEmpty()) return false; - if (type() != NodeType.tenant) return false; - - // We always violate exclusivity if the parent is exclusive to someone else that the requesting application. - if ( ! emptyOrEqual(parent.flatMap(Node::exclusiveToApplicationId), application)) return true; + public enum ExclusivityViolation { + NONE, YES, - // In zones which do not allow host sharing, exclusivity is violated if... - if ( ! hostSharing) { - // If either the parent is dedicated to a cluster type different from this cluster - return ! emptyOrEqual(parent.flatMap(Node::exclusiveToClusterType), cluster.type()) || - // or this cluster requires exclusivity, but the host is not exclusive (to this, implicitly by the above). - exclusiveCluster && parent.flatMap(Node::exclusiveToApplicationId).isEmpty(); - } + /** No violation IF AND ONLY IF the parent host's exclusiveToApplicationId is set to this application. */ + PARENT_HOST_NOT_EXCLUSIVE + } - // In zones with shared hosts we require that if any node on the host requires exclusivity, - // then all the nodes on the host must have the same owner. - for (Node nodeOnHost : allNodes.childrenOf(parentHostname().get())) { - if (nodeOnHost.allocation().isEmpty()) continue; - if (exclusiveCluster || nodeOnHost.allocation().get().membership().cluster().isExclusive()) { - if ( ! nodeOnHost.allocation().get().owner().equals(application)) return true; + public ExclusivityViolation violatesExclusivity(ClusterSpec cluster, ApplicationId application, + boolean exclusiveAllocation, boolean exclusiveProvisioning, + boolean hostSharing, NodeList allNodes, boolean makeExclusive) { + if (parentHostname().isEmpty()) return ExclusivityViolation.NONE; + if (type() != NodeType.tenant) return ExclusivityViolation.NONE; + + if (hostSharing) { + // In zones with shared hosts we require that if any node on the host requires exclusivity, + // then all the nodes on the host must have the same owner. + for (Node nodeOnHost : allNodes.childrenOf(parentHostname().get())) { + if (nodeOnHost.allocation().isEmpty()) continue; + if (exclusiveAllocation || nodeOnHost.allocation().get().membership().cluster().isExclusive()) { + if ( ! nodeOnHost.allocation().get().owner().equals(application)) return ExclusivityViolation.YES; + } } + } else { + // the parent is exclusive to another cluster type + if ( ! emptyOrEqual(parent.flatMap(Node::exclusiveToClusterType), cluster.type())) + return ExclusivityViolation.YES; + + // the parent is provisioned for another application + if ( ! emptyOrEqual(parent.flatMap(Node::provisionedForApplicationId), application)) + return ExclusivityViolation.YES; + + // this cluster requires a parent that was provisioned for this application + if (exclusiveProvisioning && parent.flatMap(Node::provisionedForApplicationId).isEmpty()) + return ExclusivityViolation.YES; + + // the parent is exclusive to another application + if ( ! emptyOrEqual(parent.flatMap(Node::exclusiveToApplicationId), application)) + return ExclusivityViolation.YES; + + // this cluster requires exclusivity, but the parent is not exclusive + if (exclusiveAllocation && parent.flatMap(Node::exclusiveToApplicationId).isEmpty()) + return makeExclusive ? ExclusivityViolation.PARENT_HOST_NOT_EXCLUSIVE : ExclusivityViolation.YES; } - return false; + return ExclusivityViolation.NONE; } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java index 89ff0938d596..9f1cb454fa7a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java @@ -11,10 +11,14 @@ import com.yahoo.jdisc.Metric; import com.yahoo.text.internal.SnippetGenerator; import com.yahoo.transaction.Mutex; +import com.yahoo.vespa.applicationmodel.InfrastructureApplication; +import com.yahoo.vespa.flags.BooleanFlag; +import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.hosted.provision.LockedNodeList; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.applications.Applications; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.IP; import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner.HostSharing; @@ -44,12 +48,14 @@ public class Preparer { private final Optional hostProvisioner; private final Optional loadBalancerProvisioner; private final ProvisioningThrottler throttler; + private final BooleanFlag makeExclusiveFlag; public Preparer(NodeRepository nodeRepository, Optional hostProvisioner, Optional loadBalancerProvisioner, Metric metric) { this.nodeRepository = nodeRepository; this.hostProvisioner = hostProvisioner; this.loadBalancerProvisioner = loadBalancerProvisioner; this.throttler = new ProvisioningThrottler(nodeRepository, metric); + this.makeExclusiveFlag = Flags.MAKE_EXCLUSIVE.bindTo(nodeRepository.flagSource()); } /** @@ -69,11 +75,12 @@ public List prepare(ApplicationId application, ClusterSpec cluster, NodeSp loadBalancerProvisioner.ifPresent(provisioner -> provisioner.prepare(application, cluster, requested)); + boolean makeExclusive = makeExclusiveFlag.value(); // Try preparing in memory without global unallocated lock. Most of the time there should be no changes, // and we can return nodes previously allocated. LockedNodeList allNodes = nodeRepository.nodes().list(PROBE_LOCK); NodeIndices indices = new NodeIndices(cluster.id(), allNodes); - NodeAllocation probeAllocation = prepareAllocation(application, cluster, requested, indices::probeNext, allNodes); + NodeAllocation probeAllocation = prepareAllocation(application, cluster, requested, indices::probeNext, allNodes, makeExclusive); if (probeAllocation.fulfilledAndNoChanges()) { List acceptedNodes = probeAllocation.finalNodes(); indices.commitProbe(); @@ -81,16 +88,17 @@ public List prepare(ApplicationId application, ClusterSpec cluster, NodeSp } else { // There were some changes, so re-do the allocation with locks indices.resetProbe(); - return prepareWithLocks(application, cluster, requested, indices); + return prepareWithLocks(application, cluster, requested, indices, makeExclusive); } } /// Note that this will write to the node repo. - private List prepareWithLocks(ApplicationId application, ClusterSpec cluster, NodeSpec requested, NodeIndices indices) { + private List prepareWithLocks(ApplicationId application, ClusterSpec cluster, NodeSpec requested, NodeIndices indices, boolean makeExclusive) { try (Mutex lock = nodeRepository.applications().lock(application); + Applications.Lock tenantHostLock = makeExclusive ? nodeRepository.applications().lock(InfrastructureApplication.TENANT_HOST.id()) : null; Mutex allocationLock = nodeRepository.nodes().lockUnallocated()) { LockedNodeList allNodes = nodeRepository.nodes().list(allocationLock); - NodeAllocation allocation = prepareAllocation(application, cluster, requested, indices::next, allNodes); + NodeAllocation allocation = prepareAllocation(application, cluster, requested, indices::next, allNodes, makeExclusive); NodeType hostType = allocation.nodeType().hostType(); if (canProvisionDynamically(hostType) && allocation.hostDeficit().isPresent()) { HostSharing sharing = hostSharing(cluster, hostType); @@ -140,7 +148,7 @@ private List prepareWithLocks(ApplicationId application, ClusterSpec clust // Non-dynamically provisioned zone with a deficit because we just now retired some nodes. // Try again, but without retiring indices.resetProbe(); - List accepted = prepareWithLocks(application, cluster, cns.withoutRetiring(), indices); + List accepted = prepareWithLocks(application, cluster, cns.withoutRetiring(), indices, makeExclusive); log.warning("Prepared " + application + " " + cluster.id() + " without retirement due to lack of capacity"); return accepted; } @@ -150,6 +158,11 @@ private List prepareWithLocks(ApplicationId application, ClusterSpec clust allocation.allocationFailureDetails(), true); // Carry out and return allocation + if (tenantHostLock != null) { + List exclusiveParents = allocation.parentsRequiredToBeExclusive(); + nodeRepository.nodes().setExclusiveToApplicationId(exclusiveParents, tenantHostLock); + // TODO: also update tags + } List acceptedNodes = allocation.finalNodes(); nodeRepository.nodes().reserve(allocation.reservableNodes()); nodeRepository.nodes().addReservedNodes(new LockedNodeList(allocation.newNodes(), allocationLock)); @@ -165,9 +178,9 @@ private List prepareWithLocks(ApplicationId application, ClusterSpec clust } private NodeAllocation prepareAllocation(ApplicationId application, ClusterSpec cluster, NodeSpec requested, - Supplier nextIndex, LockedNodeList allNodes) { + Supplier nextIndex, LockedNodeList allNodes, boolean makeExclusive) { validateAccount(requested.cloudAccount(), application, allNodes); - NodeAllocation allocation = new NodeAllocation(allNodes, application, cluster, requested, nextIndex, nodeRepository); + NodeAllocation allocation = new NodeAllocation(allNodes, application, cluster, requested, nextIndex, nodeRepository, makeExclusive); var allocationContext = IP.Allocation.Context.from(nodeRepository.zone().cloud().name(), requested.cloudAccount().isExclave(nodeRepository.zone()), nodeRepository.nameResolver()); @@ -208,7 +221,7 @@ private boolean canProvisionDynamically(NodeType hostType) { private HostSharing hostSharing(ClusterSpec cluster, NodeType hostType) { if ( hostType.isSharable()) - return cluster.isExclusive() ? HostSharing.provision : + return nodeRepository.exclusiveProvisioning(cluster) ? HostSharing.provision : nodeRepository.exclusiveAllocation(cluster) ? HostSharing.exclusive : HostSharing.any; else diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java index f1d11da6b587..c804ade668c7 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java @@ -24,9 +24,12 @@ import com.yahoo.docproc.jdisc.metric.NullMetric; import com.yahoo.net.HostName; import com.yahoo.test.ManualClock; +import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.flags.InMemoryFlagSource; import com.yahoo.vespa.flags.PermanentFlags; import com.yahoo.vespa.flags.custom.ClusterCapacity; +import com.yahoo.vespa.flags.custom.HostResources; +import com.yahoo.vespa.flags.custom.SharedHost; import com.yahoo.vespa.hosted.provision.LockedNodeList; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.Node.State; @@ -279,7 +282,7 @@ public void does_not_remove_if_host_provisioner_failed() { @Test public void respects_exclusive_allocation() { - tester = new DynamicProvisioningTester(); + tester = new DynamicProvisioningTester(Cloud.builder().name(CloudName.AWS).dynamicProvisioning(true).allowHostSharing(false).build(), new MockNameResolver()); NodeResources resources1 = new NodeResources(24, 64, 100, 10); setPreprovisionCapacityFlag(tester, new ClusterCapacity(1, resources1.vcpu(), resources1.memoryGb(), resources1.diskGb(), @@ -290,6 +293,7 @@ public void respects_exclusive_allocation() { resources1.bandwidthGbps(), resources1.diskSpeed().name(), resources1.storageType().name(), resources1.architecture().name(), null)); + tester.flagSource.withBooleanFlag(Flags.MAKE_EXCLUSIVE.id(), true); tester.maintain(); // Hosts are provisioned @@ -316,6 +320,44 @@ public void respects_exclusive_allocation() { tester.assertNodesUnchanged(); } + @Test + public void works_as_before_without_make_exclusive() { + // TODO(hakon): Remove test once make-exclusive has rolled out + tester = new DynamicProvisioningTester(Cloud.builder().name(CloudName.AWS).dynamicProvisioning(true).allowHostSharing(false).build(), new MockNameResolver()); + NodeResources resources1 = new NodeResources(24, 64, 100, 10); + setPreprovisionCapacityFlag(tester, + new ClusterCapacity(1, resources1.vcpu(), resources1.memoryGb(), resources1.diskGb(), + resources1.bandwidthGbps(), resources1.diskSpeed().name(), + resources1.storageType().name(), resources1.architecture().name(), + null)); + tester.flagSource.withJacksonFlag(PermanentFlags.SHARED_HOST.id(), + new SharedHost(List.of(new HostResources(48d, 128d, 200d, 20d, "fast", "remote", null, 4, "x86_64"))), + SharedHost.class); + tester.maintain(); + + // Hosts are provisioned + assertEquals(1, tester.provisionedHostsMatching(resources1)); + assertEquals(0, tester.hostProvisioner.deprovisionedHosts()); + assertEquals(Optional.empty(), tester.nodeRepository.nodes().node("host100").flatMap(Node::exclusiveToApplicationId)); + + // Next maintenance run does nothing + tester.assertNodesUnchanged(); + + // One host is allocated exclusively to some other application + tester.nodeRepository.nodes().write(tester.nodeRepository.nodes().node("host100").get() + .withExclusiveToApplicationId(ApplicationId.from("t", "a", "i")), + () -> { }); + + tester.maintain(); + + // New hosts are provisioned, and the empty exclusive host is deallocated + assertEquals(1, tester.provisionedHostsMatching(resources1)); + assertEquals(1, tester.hostProvisioner.deprovisionedHosts()); + + // Next maintenance run does nothing + tester.assertNodesUnchanged(); + } + @Test public void test_minimum_capacity() { tester = new DynamicProvisioningTester(); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidateTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidateTest.java index ba35aa67dac7..3f5992b2a647 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidateTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidateTest.java @@ -23,17 +23,17 @@ public class NodeCandidateTest { @Test public void testOrdering() { List expected = List.of( - new NodeCandidate.ConcreteNodeCandidate(node("01", Node.State.ready), false, new NodeResources(2, 2, 2, 2), Optional.empty(), false, true, true, false, false), - new NodeCandidate.ConcreteNodeCandidate(node("02", Node.State.active), false, new NodeResources(2, 2, 2, 2), Optional.empty(), true, true, false, false, false), - new NodeCandidate.ConcreteNodeCandidate(node("04", Node.State.reserved), false, new NodeResources(2, 2, 2, 2), Optional.empty(), true, true, false, false, false), - new NodeCandidate.ConcreteNodeCandidate(node("03", Node.State.inactive), false, new NodeResources(2, 2, 2, 2), Optional.empty(), true, true, false, false, false), - new NodeCandidate.ConcreteNodeCandidate(node("05", Node.State.ready), false, new NodeResources(2, 2, 2, 2), Optional.of(node("host1", Node.State.active)), true, true, false, true, false), - new NodeCandidate.ConcreteNodeCandidate(node("06", Node.State.ready), false, new NodeResources(2, 2, 2, 2), Optional.of(node("host1", Node.State.ready)), true, true, false, true, false), - new NodeCandidate.ConcreteNodeCandidate(node("07", Node.State.ready), false, new NodeResources(2, 2, 2, 2), Optional.of(node("host1", Node.State.provisioned)), true, true, false, true, false), - new NodeCandidate.ConcreteNodeCandidate(node("08", Node.State.ready), false, new NodeResources(2, 2, 2, 2), Optional.of(node("host1", Node.State.failed)), true, true, false, true, false), - new NodeCandidate.ConcreteNodeCandidate(node("09", Node.State.ready), false, new NodeResources(1, 1, 1, 1), Optional.empty(), true, true, false, true, false), - new NodeCandidate.ConcreteNodeCandidate(node("10", Node.State.ready), false, new NodeResources(2, 2, 2, 2), Optional.empty(), true, true, false, true, false), - new NodeCandidate.ConcreteNodeCandidate(node("11", Node.State.ready), false, new NodeResources(2, 2, 2, 2), Optional.empty(), true, true, false, true, false) + new NodeCandidate.ConcreteNodeCandidate(node("01", Node.State.ready), false, new NodeResources(2, 2, 2, 2), Optional.empty(), false, true, false, true, false, false), + new NodeCandidate.ConcreteNodeCandidate(node("02", Node.State.active), false, new NodeResources(2, 2, 2, 2), Optional.empty(), true, true, false, false, false, false), + new NodeCandidate.ConcreteNodeCandidate(node("04", Node.State.reserved), false, new NodeResources(2, 2, 2, 2), Optional.empty(), true, true, false, false, false, false), + new NodeCandidate.ConcreteNodeCandidate(node("03", Node.State.inactive), false, new NodeResources(2, 2, 2, 2), Optional.empty(), true, true, false, false, false, false), + new NodeCandidate.ConcreteNodeCandidate(node("05", Node.State.ready), false, new NodeResources(2, 2, 2, 2), Optional.of(node("host1", Node.State.active)), true, true, false, false, true, false), + new NodeCandidate.ConcreteNodeCandidate(node("06", Node.State.ready), false, new NodeResources(2, 2, 2, 2), Optional.of(node("host1", Node.State.ready)), true, true, false, false, true, false), + new NodeCandidate.ConcreteNodeCandidate(node("07", Node.State.ready), false, new NodeResources(2, 2, 2, 2), Optional.of(node("host1", Node.State.provisioned)), true, true, false, false, true, false), + new NodeCandidate.ConcreteNodeCandidate(node("08", Node.State.ready), false, new NodeResources(2, 2, 2, 2), Optional.of(node("host1", Node.State.failed)), true, true, false, false, true, false), + new NodeCandidate.ConcreteNodeCandidate(node("09", Node.State.ready), false, new NodeResources(1, 1, 1, 1), Optional.empty(), true, true, false, false, true, false), + new NodeCandidate.ConcreteNodeCandidate(node("10", Node.State.ready), false, new NodeResources(2, 2, 2, 2), Optional.empty(), true, true, false, false, true, false), + new NodeCandidate.ConcreteNodeCandidate(node("11", Node.State.ready), false, new NodeResources(2, 2, 2, 2), Optional.empty(), true, true, false, false, true, false) ); assertOrder(expected); } @@ -148,7 +148,7 @@ private static NodeCandidate node(String hostname, .ipConfig(IP.Config.of(List.of("::1"), List.of("::2"))) .build(); return new NodeCandidate.ConcreteNodeCandidate(node, false, totalHostResources.subtract(allocatedHostResources), Optional.of(parent), - false, exclusiveSwitch, false, true, false); + false, exclusiveSwitch, false, false, true, false); } private static NodeCandidate node(String hostname, NodeResources nodeResources,