Skip to content

Commit

Permalink
Merge branch 'master' into interns/magnus/servicesxml
Browse files Browse the repository at this point in the history
  • Loading branch information
Mangern committed Oct 25, 2024
2 parents 069936c + 217825f commit db84d85
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 40 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,8 @@ public NodeRepository(NodeFlavors flavors,
this.clock = clock;
this.zone = zone;
this.applications = new Applications(db);
this.nodes = new Nodes(db, zone, clock, orchestrator, applications);
this.snapshots = new Snapshots(this);
this.nodes = new Nodes(db, zone, clock, orchestrator, applications, snapshots, flagSource);
this.flavors = flavors;
this.resourcesCalculator = provisionServiceProvider.getHostResourcesCalculator();
this.nodeResourceLimits = new NodeResourceLimits(this);
Expand All @@ -151,7 +152,6 @@ public NodeRepository(NodeFlavors flavors,
this.orchestrator = orchestrator;
this.spareCount = spareCount;
this.healthChecker = provisionServiceProvider.getHealthChecker();
this.snapshots = new Snapshots(this);
nodes.rewrite();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,29 @@

import com.yahoo.component.Version;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.ApplicationMutex;
import com.yahoo.config.provision.ApplicationTransaction;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.Flavor;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.ApplicationMutex;
import com.yahoo.config.provision.Zone;
import com.yahoo.time.TimeBudget;
import com.yahoo.transaction.Mutex;
import com.yahoo.transaction.NestedTransaction;
import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.applicationmodel.InfrastructureApplication;
import com.yahoo.vespa.flags.BooleanFlag;
import com.yahoo.vespa.flags.FlagSource;
import com.yahoo.vespa.flags.Flags;
import com.yahoo.vespa.hosted.provision.LockedNodeList;
import com.yahoo.vespa.hosted.provision.NoSuchNodeException;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.Node.State;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeMutex;
import com.yahoo.vespa.hosted.provision.applications.Applications;
import com.yahoo.vespa.hosted.provision.backup.Snapshots;
import com.yahoo.vespa.hosted.provision.maintenance.NodeFailer;
import com.yahoo.vespa.hosted.provision.node.filter.NodeFilter;
import com.yahoo.vespa.hosted.provision.persistence.CuratorDb;
Expand Down Expand Up @@ -74,13 +79,17 @@ public class Nodes {
private final Clock clock;
private final Orchestrator orchestrator;
private final Applications applications;
private final Snapshots snapshots;
private final BooleanFlag snapshotsEnabled;

public Nodes(CuratorDb db, Zone zone, Clock clock, Orchestrator orchestrator, Applications applications) {
public Nodes(CuratorDb db, Zone zone, Clock clock, Orchestrator orchestrator, Applications applications, Snapshots snapshots, FlagSource flagSource) {
this.zone = zone;
this.clock = clock;
this.db = db;
this.orchestrator = orchestrator;
this.applications = applications;
this.snapshots = snapshots;
this.snapshotsEnabled = Flags.SNAPSHOTS_ENABLED.bindTo(flagSource);
}

/** Read and write all nodes to make sure they are stored in the latest version of the serialized format */
Expand Down Expand Up @@ -652,46 +661,48 @@ public List<Node> retire(Predicate<Node> filter, Agent agent, Instant instant) {
}

/** Retire and deprovision given host and all of its children */
public List<Node> deprovision(String hostname, Agent agent, Instant instant) {
return decommission(hostname, HostOperation.deprovision, agent, instant);
public void deprovision(String hostname, Agent agent, Instant instant) {
decommission(hostname, HostOperation.deprovision, agent, instant);
}

/** Rebuild given host */
public List<Node> rebuild(String hostname, boolean soft, Agent agent, Instant instant) {
return decommission(hostname, soft ? HostOperation.softRebuild : HostOperation.rebuild, agent, instant);
public void rebuild(String hostname, boolean soft, Agent agent, Instant instant) {
decommission(hostname, soft ? HostOperation.softRebuild : HostOperation.rebuild, agent, instant);
}

/** Upgrade flavor for given host */
public List<Node> upgradeFlavor(String hostname, Agent agent, Instant instant, boolean upgrade) {
return decommission(hostname, upgrade ? HostOperation.upgradeFlavor : HostOperation.cancel, agent, instant);
}

private List<Node> decommission(String hostname, HostOperation op, Agent agent, Instant instant) {
Optional<NodeMutex> nodeMutex = lockAndGet(hostname);
if (nodeMutex.isEmpty()) return List.of();
List<Node> result = new ArrayList<>();
boolean wantToDeprovision = op == HostOperation.deprovision;
boolean wantToRebuild = op == HostOperation.rebuild || op == HostOperation.softRebuild;
boolean wantToRetire = op.needsRetirement();
boolean wantToUpgradeFlavor = op == HostOperation.upgradeFlavor;
Node host = nodeMutex.get().node();
try (NodeMutex lock = nodeMutex.get()) {
if ( ! host.type().isHost()) throw new IllegalArgumentException("Cannot " + op + " non-host " + host);
try (Mutex allocationLock = lockUnallocated()) {
// Modify parent with wantToRetire while holding the allocationLock to prevent
// any further allocation of nodes on this host
Node newHost = lock.node().withWantToRetire(wantToRetire, wantToDeprovision, wantToRebuild, wantToUpgradeFlavor, agent, instant);
result.add(write(newHost, lock));
public void upgradeFlavor(String hostname, Agent agent, Instant instant, boolean upgrade) {
decommission(hostname, upgrade ? HostOperation.upgradeFlavor : HostOperation.cancel, agent, instant);
}

private void decommission(String hostname, HostOperation op, Agent agent, Instant instant) {
try (var nodeMutexes = lockAndGetRecursively(hostname, Optional.of(Duration.ofSeconds(5)))) {
if (nodeMutexes.parent.isEmpty()) return;

NodeMutex hostMutex = nodeMutexes.parent.get();
if ( ! hostMutex.node().type().isHost()) throw new IllegalArgumentException("Cannot " + op + " non-host " + hostMutex.node());

boolean wantToDeprovision = op == HostOperation.deprovision;
boolean wantToRebuild = op == HostOperation.rebuild || op == HostOperation.softRebuild;
boolean wantToRetire = op.needsRetirement();
boolean wantToUpgradeFlavor = op == HostOperation.upgradeFlavor;
boolean wantToSnapshot = op.needsSnapshot(hostMutex.node(), snapshotsEnabled.value());

// Update host
Node newHost = hostMutex.node().withWantToRetire(wantToRetire, wantToDeprovision, wantToRebuild, wantToUpgradeFlavor, agent, instant);
write(newHost, hostMutex);

// Update children
for (var childMutex : nodeMutexes.children()) {
if (wantToRetire || op == HostOperation.cancel) {
Node newNode = childMutex.node().withWantToRetire(wantToRetire, wantToDeprovision, false, false, agent, instant);
write(newNode, childMutex);
}
if (wantToSnapshot) {
snapshots.create(childMutex.node().hostname(), clock.instant());
}
}
}
if (wantToRetire || op == HostOperation.cancel) { // Apply recursively if we're retiring, or cancelling
List<Node> updatedNodes = performOn(list().childrenOf(host), (node, nodeLock) -> {
Node newNode = node.withWantToRetire(wantToRetire, wantToDeprovision, false, false, agent, instant);
return write(newNode, nodeLock);
});
result.addAll(updatedNodes);
}
return result;
}

/**
Expand Down Expand Up @@ -1104,6 +1115,13 @@ public boolean needsRetirement() {
return needsRetirement;
}

/** Returns whether this operation requires a snapshot to be created for all children of given host */
public boolean needsSnapshot(Node host, boolean enabled) {
return this == softRebuild &&
host.resources().storageType() == NodeResources.StorageType.local &&
enabled;
}

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ MatchMaster::match(search::engine::Trace & trace,
* We need a non-const first phase rank lookup since it will be populated
* later on when selecting documents for second phase ranking.
*/
MatchLoopCommunicator communicator(threadBundle.size(), params.heapSize, mtf.createDiversifier(params.heapSize),
MatchLoopCommunicator communicator(threadBundle.size(), params.heapSize, mtf.createDiversifier(params.diversity_want_hits),
mtf.get_first_phase_rank_lookup(),
[&mtf]() noexcept { mtf.query().set_matching_phase(MatchingPhase::SECOND_PHASE); });
TimedMatchLoopCommunicator timedCommunicator(communicator);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ MatchParams::MatchParams(uint32_t numDocs_in,
: 0),
offset(std::min(numDocs_in, offset_in)),
hits(std::min(numDocs_in - offset, hits_in)),
diversity_want_hits(heapSize_in),
first_phase_rank_score_drop_limit(first_phase_rank_score_drop_limit_in),
second_phase_rank_score_drop_limit(second_phase_rank_score_drop_limit_in)
{ }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ struct MatchParams {
const uint32_t arraySize;
const uint32_t offset;
const uint32_t hits;
const uint32_t diversity_want_hits;
const std::optional<search::feature_t> first_phase_rank_score_drop_limit;
const std::optional<search::feature_t> second_phase_rank_score_drop_limit;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ MatchToolsFactory::createMatchTools() const
}

std::unique_ptr<IDiversifier>
MatchToolsFactory::createDiversifier(uint32_t heapSize) const
MatchToolsFactory::createDiversifier(uint32_t want_hits) const
{
if ( !_diversityParams.enabled() ) {
return {};
Expand All @@ -267,8 +267,8 @@ MatchToolsFactory::createDiversifier(uint32_t heapSize) const
Issue::report("Skipping diversity due to no %s attribute.", _diversityParams.attribute.c_str());
return {};
}
size_t max_per_group = std::max(size_t(1), size_t(heapSize / _diversityParams.min_groups));
return DiversityFilter::create(*attr, heapSize, max_per_group, _diversityParams.min_groups,
size_t max_per_group = std::max(size_t(1), size_t(want_hits / _diversityParams.min_groups));
return DiversityFilter::create(*attr, want_hits, max_per_group, _diversityParams.min_groups,
_diversityParams.cutoff_strategy == DiversityParams::CutoffStrategy::STRICT);
}

Expand Down

0 comments on commit db84d85

Please sign in to comment.