Skip to content

Commit

Permalink
Merge pull request #29546 from vespa-engine/vekterli/use-fake-zk-data…
Browse files Browse the repository at this point in the history
…base-for-subset-of-cc-tests

Use fake ZooKeeper database implementation for subset of CC tests
  • Loading branch information
vekterli authored Dec 4, 2023
2 parents dd6b768 + e7a2360 commit 70d6cc8
Show file tree
Hide file tree
Showing 7 changed files with 184 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import com.yahoo.vdslib.state.NodeState;
import com.yahoo.vdslib.state.State;
import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler;
import com.yahoo.vespa.clustercontroller.core.database.ZooKeeperDatabaseFactory;
import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
import com.yahoo.vespa.clustercontroller.core.listeners.NodeListener;
import com.yahoo.vespa.clustercontroller.core.listeners.SlobrokListener;
Expand Down Expand Up @@ -152,7 +151,7 @@ public static FleetController create(FleetControllerOptions options, MetricRepor
options.nodeStateRequestTimeoutEarliestPercentage(),
options.nodeStateRequestTimeoutLatestPercentage(),
options.nodeStateRequestRoundTripTimeMaxSeconds());
var database = new DatabaseHandler(context, new ZooKeeperDatabaseFactory(context), timer, options.zooKeeperServerAddress(), timer);
var database = new DatabaseHandler(context, options.dbFactoryFn().apply(context), timer, options.zooKeeperServerAddress(), timer);
var lookUp = new SlobrokClient(context, timer, options.slobrokConnectionSpecs());
var stateGenerator = new StateChangeHandler(context, timer, log);
var stateBroadcaster = new SystemStateBroadcaster(context, timer, timer);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
import com.yahoo.vdslib.distribution.ConfiguredNode;
import com.yahoo.vdslib.distribution.Distribution;
import com.yahoo.vdslib.state.NodeType;
import com.yahoo.vespa.clustercontroller.core.database.DatabaseFactory;
import com.yahoo.vespa.clustercontroller.core.database.ZooKeeperDatabaseFactory;

import java.time.Duration;
import java.util.Collection;
import java.util.Collections;
Expand All @@ -13,6 +16,7 @@
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.function.Function;

/**
* Immutable class representing all the options that can be set in the fleetcontroller.
Expand Down Expand Up @@ -128,6 +132,9 @@ public class FleetControllerOptions {

private final int maxNumberOfGroupsAllowedToBeDown;

private final Function<FleetControllerContext, DatabaseFactory> dbFactoryFn;

// TODO less impressive length...!
private FleetControllerOptions(String clusterName,
int fleetControllerIndex,
int fleetControllerCount,
Expand Down Expand Up @@ -168,7 +175,8 @@ private FleetControllerOptions(String clusterName,
boolean clusterFeedBlockEnabled,
Map<String, Double> clusterFeedBlockLimit,
double clusterFeedBlockNoiseLevel,
int maxNumberOfGroupsAllowedToBeDown) {
int maxNumberOfGroupsAllowedToBeDown,
Function<FleetControllerContext, DatabaseFactory> dbFactoryFn) {
this.clusterName = clusterName;
this.fleetControllerIndex = fleetControllerIndex;
this.fleetControllerCount = fleetControllerCount;
Expand Down Expand Up @@ -210,6 +218,7 @@ private FleetControllerOptions(String clusterName,
this.clusterFeedBlockLimit = clusterFeedBlockLimit;
this.clusterFeedBlockNoiseLevel = clusterFeedBlockNoiseLevel;
this.maxNumberOfGroupsAllowedToBeDown = maxNumberOfGroupsAllowedToBeDown;
this.dbFactoryFn = dbFactoryFn;
}

public Duration getMaxDeferredTaskVersionWaitTime() {
Expand Down Expand Up @@ -382,6 +391,8 @@ public double clusterFeedBlockNoiseLevel() {

public int maxNumberOfGroupsAllowedToBeDown() { return maxNumberOfGroupsAllowedToBeDown; }

public Function<FleetControllerContext, DatabaseFactory> dbFactoryFn() { return dbFactoryFn; }

public static class Builder {

private String clusterName;
Expand Down Expand Up @@ -425,6 +436,7 @@ public static class Builder {
private Map<String, Double> clusterFeedBlockLimit = Collections.emptyMap();
private double clusterFeedBlockNoiseLevel = 0.01;
private int maxNumberOfGroupsAllowedToBeDown = 1;
private Function<FleetControllerContext, DatabaseFactory> dbFactoryFn = ZooKeeperDatabaseFactory::new;

public Builder(String clusterName, Collection<ConfiguredNode> nodes) {
this.clusterName = clusterName;
Expand Down Expand Up @@ -677,6 +689,11 @@ public Builder setMaxNumberOfGroupsAllowedToBeDown(int maxNumberOfGroupsAllowedT
return this;
}

public Builder setDbFactoryFn(Function<FleetControllerContext, DatabaseFactory> fn) {
this.dbFactoryFn = fn;
return this;
}

public FleetControllerOptions build() {
return new FleetControllerOptions(clusterName,
index,
Expand Down Expand Up @@ -718,7 +735,8 @@ public FleetControllerOptions build() {
clusterFeedBlockEnabled,
clusterFeedBlockLimit,
clusterFeedBlockNoiseLevel,
maxNumberOfGroupsAllowedToBeDown);
maxNumberOfGroupsAllowedToBeDown,
dbFactoryFn);
}

public static Builder copy(FleetControllerOptions options) {
Expand Down Expand Up @@ -764,6 +782,7 @@ public static Builder copy(FleetControllerOptions options) {
builder.clusterFeedBlockLimit = Map.copyOf(options.clusterFeedBlockLimit);
builder.clusterFeedBlockNoiseLevel = options.clusterFeedBlockNoiseLevel;
builder.maxNumberOfGroupsAllowedToBeDown = options.maxNumberOfGroupsAllowedToBeDown;
builder.dbFactoryFn = options.dbFactoryFn;

return builder;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
public interface DatabaseFactory {

class Params {
String dbAddress;
int dbSessionTimeout;
Database.DatabaseListener listener;
public String dbAddress;
public int dbSessionTimeout;
public Database.DatabaseListener listener;

Params databaseAddress(String address) { this.dbAddress = address; return this; }
Params databaseSessionTimeout(int timeout) { this.dbSessionTimeout = timeout; return this; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,12 @@

import static org.junit.jupiter.api.Assertions.assertEquals;

@ExtendWith(CleanupZookeeperLogsOnSuccess.class)
public class DistributionBitCountTest extends FleetControllerTest {

DistributionBitCountTest() {
useRealZooKeeperInTest(false);
}

private FleetControllerOptions setUpSystem() throws Exception {
List<ConfiguredNode> configuredNodes = new ArrayList<>();
for (int i = 0 ; i < 10; i++) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.clustercontroller.core;

import com.yahoo.vdslib.state.Node;
import com.yahoo.vdslib.state.NodeState;
import com.yahoo.vespa.clustercontroller.core.database.Database;
import com.yahoo.vespa.clustercontroller.core.database.DatabaseFactory;

import java.util.Map;
import java.util.TreeMap;

/**
* Memory-backed fake DB implementation that tries to mirror the semantics of the
* (synchronous) ZooKeeper DB implementation. By itself this fake acts as if a quorum
* with a _single_, local ZK instance has been configured. This DB instance cannot be
* used across multiple cluster controller instances.
*
* Threading note: we expect all invocations on this instance to happen from the
* main cluster controller thread (i.e. "as-if" single threaded), but we wrap everything
* in a mutex to stay on the safe side since this isn't explicitly documented as
* part of the API,
*/
public class FakeZooKeeperDatabase extends Database {

public static class Factory implements DatabaseFactory {
private final FleetControllerContext context;
public Factory(FleetControllerContext context) {
this.context = context;
}
@Override
public Database create(Params params) {
return new FakeZooKeeperDatabase(context, params.listener);
}
}

private final FleetControllerContext context;
private final Database.DatabaseListener listener;

private final Object mutex = new Object();
private boolean closed = false;
private Integer persistedLatestStateVersion = null;
private Map<Integer, Integer> persistedLeaderVotes = new TreeMap<>();
private Map<Node, NodeState> persistedWantedStates = new TreeMap<>();
private Map<Node, Long> persistedStartTimestamps = new TreeMap<>();
private ClusterStateBundle persistedBundle = ClusterStateBundle.ofBaselineOnly(AnnotatedClusterState.emptyState());

public FakeZooKeeperDatabase(FleetControllerContext context, DatabaseListener listener) {
this.context = context;
this.listener = listener;
}

@Override
public void close() {
synchronized (mutex) {
closed = true;
}
}

@Override
public boolean isClosed() {
synchronized (mutex) {
return closed;
}
}

@Override
public boolean storeMasterVote(int voteForNode) {
Map<Integer, Integer> voteState;
synchronized (mutex) {
persistedLeaderVotes.put(context.id().index(), voteForNode);
voteState = Map.copyOf(persistedLeaderVotes);
}
listener.handleMasterData(voteState);
return true;
}

@Override
public boolean storeLatestSystemStateVersion(int version) {
synchronized (mutex) {
persistedLatestStateVersion = version;
return true;
}
}

@Override
public Integer retrieveLatestSystemStateVersion() {
synchronized (mutex) {
return persistedLatestStateVersion;
}
}

@Override
public boolean storeWantedStates(Map<Node, NodeState> states) {
synchronized (mutex) {
persistedWantedStates = Map.copyOf(states);
}
return true;
}

@Override
public Map<Node, NodeState> retrieveWantedStates() {
synchronized (mutex) {
return Map.copyOf(persistedWantedStates);
}
}

@Override
public boolean storeStartTimestamps(Map<Node, Long> timestamps) {
synchronized (mutex) {
persistedStartTimestamps = Map.copyOf(timestamps);
return true;
}
}

@Override
public Map<Node, Long> retrieveStartTimestamps() {
synchronized (mutex) {
return Map.copyOf(persistedStartTimestamps);
}
}

@Override
public boolean storeLastPublishedStateBundle(ClusterStateBundle stateBundle) {
synchronized (mutex) {
persistedBundle = stateBundle;
return true;
}
}

@Override
public ClusterStateBundle retrieveLastPublishedStateBundle() {
synchronized (mutex) {
return persistedBundle;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import com.yahoo.vdslib.state.NodeType;
import com.yahoo.vdslib.state.State;
import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler;
import com.yahoo.vespa.clustercontroller.core.database.ZooKeeperDatabaseFactory;
import com.yahoo.vespa.clustercontroller.core.rpc.RPCCommunicator;
import com.yahoo.vespa.clustercontroller.core.rpc.RpcServer;
import com.yahoo.vespa.clustercontroller.core.rpc.SlobrokClient;
Expand Down Expand Up @@ -49,13 +48,14 @@
*/
public abstract class FleetControllerTest implements Waiter {

private static final Logger log = Logger.getLogger(FleetControllerTest.class.getName());
protected static final Logger log = Logger.getLogger(FleetControllerTest.class.getName());
private static final int DEFAULT_NODE_COUNT = 10;

private final Duration timeout = Duration.ofSeconds(30);
protected Slobrok slobrok;

protected FleetControllerOptions options;
private boolean useRealZooKeeperInTest = true;
ZooKeeperTestServer zooKeeperServer;
protected final List<FleetController> fleetControllers = new ArrayList<>();
protected List<DummyVdsNode> nodes = new ArrayList<>();
Expand All @@ -73,6 +73,10 @@ public abstract class FleetControllerTest implements Waiter {
LogSetup.initVespaLogging("fleetcontroller");
}

protected void useRealZooKeeperInTest(boolean useRealZk) {
this.useRealZooKeeperInTest = useRealZk;
}

protected static FleetControllerOptions.Builder defaultOptions() {
return defaultOptions(IntStream.range(0, DEFAULT_NODE_COUNT)
.mapToObj(i -> new ConfiguredNode(i, false))
Expand Down Expand Up @@ -121,7 +125,7 @@ FleetController createFleetController(Timer timer,
var log = new EventLog(timer, metricUpdater);
var cluster = new ContentCluster(options.clusterName(), options.nodes(), options.storageDistribution());
var stateGatherer = new NodeStateGatherer(timer, timer, log);
var database = new DatabaseHandler(context, new ZooKeeperDatabaseFactory(context), timer, options.zooKeeperServerAddress(), timer);
var database = new DatabaseHandler(context, options.dbFactoryFn().apply(context), timer, options.zooKeeperServerAddress(), timer);
// Setting this <1000 ms causes ECONNREFUSED on socket trying to connect to ZK server, in ZooKeeper,
// after creating a new ZooKeeper (session). This causes ~10s extra time to connect after connection loss.
// Reasons unknown. Larger values like the default 10_000 causes that much additional running time for some tests.
Expand All @@ -139,7 +143,13 @@ FleetController createFleetController(Timer timer,
}

protected FleetControllerOptions setUpFleetController(Timer timer, FleetControllerOptions.Builder builder) throws Exception {
setUpZooKeeperServer(builder);
// TODO consolidate CC setup in tests; currently partial duplication of
// setup/init code across test subclasses.
if (useRealZooKeeperInTest) {
setUpZooKeeperServer(builder);
} else {
builder.setDbFactoryFn(FakeZooKeeperDatabase.Factory::new);
}
builder.setSlobrokConnectionSpecs(getSlobrokConnectionSpecs(slobrok));
options = builder.build();
startFleetController(timer);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import com.yahoo.vdslib.state.State;
import com.yahoo.vespa.clustercontroller.core.testutils.StateWaiter;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;

import java.time.Duration;
import java.util.ArrayList;
Expand All @@ -23,22 +22,25 @@
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;

@ExtendWith(CleanupZookeeperLogsOnSuccess.class)
public class StateChangeTest extends FleetControllerTest {

private final FakeTimer timer = new FakeTimer();

private FleetController ctrl;
private DummyCommunicator communicator;

StateChangeTest() {
useRealZooKeeperInTest(false);
}

private void initialize(FleetControllerOptions.Builder builder) throws Exception {
List<Node> nodes = new ArrayList<>();
for (int i = 0; i < builder.nodes().size(); ++i) {
nodes.add(new Node(NodeType.STORAGE, i));
nodes.add(new Node(NodeType.DISTRIBUTOR, i));
}

setUpZooKeeperServer(builder);
builder.setDbFactoryFn(FakeZooKeeperDatabase.Factory::new);
communicator = new DummyCommunicator(nodes, timer);
boolean start = false;
FleetControllerOptions options = builder.build();
Expand Down

0 comments on commit 70d6cc8

Please sign in to comment.