Skip to content

Commit

Permalink
Merge pull request #20819 from vespa-engine/revert-20801-jonmv/reappl…
Browse files Browse the repository at this point in the history
…y-zk-changes-2

Revert "Jonmv/reapply zk changes 2"
  • Loading branch information
hakonhall authored Jan 14, 2022
2 parents 85ab960 + 810da35 commit 1d84bb2
Show file tree
Hide file tree
Showing 16 changed files with 151 additions and 663 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,14 @@ public ClusterControllerCluster(AbstractConfigProducer<?> parent, String subId,
public void getConfig(ZookeeperServerConfig.Builder builder) {
builder.clientPort(ZK_CLIENT_PORT);
builder.juteMaxBuffer(1024 * 1024); // 1 Mb should be more than enough for cluster controller
boolean oldQuorumExists = containerCluster.getContainers().stream() // More than half the previous hosts must be present in the new config for quorum to persist.
.filter(container -> previousHosts.contains(container.getHostName())) // Set intersection is symmetric.
.count() > previousHosts.size() / 2;
for (ClusterControllerContainer container : containerCluster.getContainers()) {
ZookeeperServerConfig.Server.Builder serverBuilder = new ZookeeperServerConfig.Server.Builder();
serverBuilder.hostname(container.getHostName());
serverBuilder.id(container.index());
serverBuilder.joining( ! previousHosts.isEmpty() && ! previousHosts.contains(container.getHostName()));
serverBuilder.retired(container.isRetired());
serverBuilder.joining(oldQuorumExists && ! previousHosts.contains(container.getHostName()));
builder.server(serverBuilder);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -309,9 +309,8 @@ public void getConfig(ZookeeperServerConfig.Builder builder) {
ZookeeperServerConfig.Server.Builder serverBuilder = new ZookeeperServerConfig.Server.Builder();
serverBuilder.hostname(container.getHostName())
.id(container.index())
.joining( ! previousHosts.isEmpty() &&
! previousHosts.contains(container.getHostName()))
.retired(container.isRetired());
.joining(!previousHosts.isEmpty() &&
!previousHosts.contains(container.getHostName()));
builder.server(serverBuilder);
builder.dynamicReconfiguration(true);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2054,7 +2054,7 @@ public void containerWithZooKeeperJoiningServers() {
assertTrue("Initial servers are not joining", config.build().server().stream().noneMatch(ZookeeperServerConfig.Server::joining));
}
{
VespaModel nextModel = tester.createModel(Zone.defaultZone(), servicesXml.apply(3), true, false, false, 0, Optional.of(model), new DeployState.Builder(), "node-1-3-10-04", "node-1-3-10-03");
VespaModel nextModel = tester.createModel(Zone.defaultZone(), servicesXml.apply(5), true, false, false, 0, Optional.of(model), new DeployState.Builder());
ApplicationContainerCluster cluster = nextModel.getContainerClusters().get("zk");
ZookeeperServerConfig.Builder config = new ZookeeperServerConfig.Builder();
cluster.getContainers().forEach(c -> c.getConfig(config));
Expand All @@ -2067,14 +2067,6 @@ public void containerWithZooKeeperJoiningServers() {
4, true),
config.build().server().stream().collect(Collectors.toMap(ZookeeperServerConfig.Server::id,
ZookeeperServerConfig.Server::joining)));
assertEquals("Retired nodes are retired",
Map.of(0, false,
1, true,
2, true,
3, false,
4, false),
config.build().server().stream().collect(Collectors.toMap(ZookeeperServerConfig.Server::id,
ZookeeperServerConfig.Server::retired)));
}
}

Expand Down
3 changes: 0 additions & 3 deletions configdefinitions/src/vespa/zookeeper-server.def
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,10 @@ juteMaxBuffer int default=52428800
myid int restart
server[].id int
server[].hostname string
server[].clientPort int default=2181
server[].quorumPort int default=2182
server[].electionPort int default=2183
# Whether this server is joining an existing cluster
server[].joining bool default=false
# Whether this server is retired, and about to be removed
server[].retired bool default=false

# Needed when upgrading from ZooKeeper 3.4 to 3.5, see https://issues.apache.org/jira/browse/ZOOKEEPER-3056,
# and in general where there is a zookeeper ensemble running that has had few transactions.
Expand Down
9 changes: 2 additions & 7 deletions zookeeper-server/zookeeper-server-3.6.3/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,6 @@
<artifactId>zookeeper-server-3.6.3</artifactId>
<packaging>container-plugin</packaging>
<version>7-SNAPSHOT</version>
<properties>
<zookeeper.version>3.6.3</zookeeper.version>
</properties>
<dependencies>
<dependency>
<groupId>com.yahoo.vespa</groupId>
Expand All @@ -35,7 +32,7 @@
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>${zookeeper.version}</version>
<version>3.6.3</version>
<exclusions>
<!--
Container provides wiring for all common log libraries
Expand Down Expand Up @@ -90,6 +87,7 @@
<configuration>
<compilerArgs>
<arg>-Xlint:all</arg>
<arg>-Werror</arg>
</compilerArgs>
</configuration>
</plugin>
Expand All @@ -99,9 +97,6 @@
<configuration>
<redirectTestOutputToFile>${test.hide}</redirectTestOutputToFile>
<forkMode>once</forkMode>
<systemPropertyVariables>
<zk-version>${zookeeper.version}</zk-version>
</systemPropertyVariables>
</configuration>
</plugin>
<plugin>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,21 @@
*/
public class ReconfigurableVespaZooKeeperServer extends AbstractComponent implements VespaZooKeeperServer {

private QuorumPeer peer;
private final AtomicReference<QuorumPeer> peer = new AtomicReference<>();

@Inject
public ReconfigurableVespaZooKeeperServer(Reconfigurer reconfigurer, ZookeeperServerConfig zookeeperServerConfig) {
peer = reconfigurer.startOrReconfigure(zookeeperServerConfig, this, () -> peer = new VespaQuorumPeer());
reconfigurer.startOrReconfigure(zookeeperServerConfig, this, VespaQuorumPeer::new, peer::set);
}

@Override
public void shutdown() {
peer.shutdown(Duration.ofMinutes(1));
peer.get().shutdown(Duration.ofMinutes(1));
}

@Override
public void start(Path configFilePath) {
peer.start(configFilePath);
peer.get().start(configFilePath);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,11 @@
package com.yahoo.vespa.zookeeper;

import com.yahoo.vespa.zookeeper.client.ZkClientConfigBuilder;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.ZooDefs;
import org.apache.zookeeper.admin.ZooKeeperAdmin;
import org.apache.zookeeper.data.ACL;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;

Expand All @@ -23,28 +19,27 @@ public class VespaZooKeeperAdminImpl implements VespaZooKeeperAdmin {
private static final Logger log = java.util.logging.Logger.getLogger(VespaZooKeeperAdminImpl.class.getName());

@Override
public void reconfigure(String connectionSpec, String servers) throws ReconfigException {
try (ZooKeeperAdmin zooKeeperAdmin = createAdmin(connectionSpec)) {
public void reconfigure(String connectionSpec, String joiningServers, String leavingServers) throws ReconfigException {
ZooKeeperAdmin zooKeeperAdmin = null;
try {
zooKeeperAdmin = createAdmin(connectionSpec);
long fromConfig = -1;
// Using string parameters because the List variant of reconfigure fails to join empty lists (observed on 3.5.6, fixed in 3.7.0)
log.log(Level.INFO, "Applying ZooKeeper config: " + servers);
byte[] appliedConfig = zooKeeperAdmin.reconfigure(null, null, servers, fromConfig, null);
byte[] appliedConfig = zooKeeperAdmin.reconfigure(joiningServers, leavingServers, null, fromConfig, null);
log.log(Level.INFO, "Applied ZooKeeper config: " + new String(appliedConfig, StandardCharsets.UTF_8));

// Verify by issuing a write operation; this is only accepted once new quorum is obtained.
List<ACL> acl = ZooDefs.Ids.OPEN_ACL_UNSAFE;
String node = zooKeeperAdmin.create("/reconfigure-dummy-node", new byte[0], acl, CreateMode.EPHEMERAL_SEQUENTIAL);
zooKeeperAdmin.delete(node, -1);

log.log(Level.INFO, "Verified ZooKeeper config: " + new String(appliedConfig, StandardCharsets.UTF_8));
}
catch ( KeeperException.ReconfigInProgress
| KeeperException.ConnectionLossException
| KeeperException.NewConfigNoQuorum e) {
throw new ReconfigException(e);
}
catch (KeeperException | IOException | InterruptedException e) {
} catch (KeeperException e) {
if (retryOn(e))
throw new ReconfigException(e);
else
throw new RuntimeException(e);
} catch (IOException | InterruptedException e) {
throw new RuntimeException(e);
} finally {
if (zooKeeperAdmin != null) {
try {
zooKeeperAdmin.close();
} catch (InterruptedException e) { /* ignore */}
}
}
}

Expand All @@ -53,5 +48,11 @@ private ZooKeeperAdmin createAdmin(String connectionSpec) throws IOException {
(event) -> log.log(Level.INFO, event.toString()), new ZkClientConfigBuilder().toConfig());
}

private static boolean retryOn(KeeperException e) {
return e instanceof KeeperException.ReconfigInProgress ||
e instanceof KeeperException.ConnectionLossException ||
e instanceof KeeperException.NewConfigNoQuorum;
}

}

Loading

0 comments on commit 1d84bb2

Please sign in to comment.