Skip to content

Commit

Permalink
Merge pull request #32637 from vespa-engine/mpolden/snapshot-history
Browse files Browse the repository at this point in the history
Record time of each snapshot event
  • Loading branch information
mpolden authored Oct 22, 2024
2 parents 93a5e84 + c5ea6e3 commit bbc6c94
Show file tree
Hide file tree
Showing 10 changed files with 185 additions and 29 deletions.
Original file line number Diff line number Diff line change
@@ -1,36 +1,86 @@
package com.yahoo.vespa.hosted.provision.backup;

import com.google.common.collect.ImmutableMap;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.HostName;
import com.yahoo.vespa.hosted.provision.node.ClusterId;

import java.time.Instant;
import java.util.Objects;
import java.util.Optional;
import java.util.UUID;

/**
* A backup snapshot of a node's local data. Only {@link ClusterSpec.Type#content} nodes support snapshots.
*
* @author mpolden
*/
public record Snapshot(SnapshotId id, HostName hostname, State state, Instant createdAt, ClusterId cluster, int clusterIndex) {
public record Snapshot(SnapshotId id, HostName hostname, State state, History history, ClusterId cluster, int clusterIndex) {

public Snapshot {
Objects.requireNonNull(id);
Objects.requireNonNull(state);
Objects.requireNonNull(hostname);
Objects.requireNonNull(createdAt);
Objects.requireNonNull(history);
Objects.requireNonNull(cluster);
if (clusterIndex < 0) {
throw new IllegalArgumentException("clusterIndex cannot be negative, got " + cluster);
}
}

public Snapshot with(State state) {
if (state.compareTo(this.state) < 0) {
public Snapshot with(State state, Instant at) {
if (!canChangeTo(state)) {
throw new IllegalArgumentException("Cannot change state of " + this + " to " + state);
}
return new Snapshot(id, hostname, state, createdAt, cluster, clusterIndex);
return new Snapshot(id, hostname, state, history.with(state, at), cluster, clusterIndex);
}

private boolean canChangeTo(State state) {
// Allow repeated restores
if (state == State.restoring && this.state == State.restored) return true;
// Otherwise only allow state changes in the order of the state enum, and at most one step at a time
return state.compareTo(this.state) >= 0 &&
state.ordinal() - this.state.ordinal() <= 1;
}

/** A recording of the most recent time of each state change */
public record History(ImmutableMap<State, Event> events) {

public History {
Objects.requireNonNull(events);
}

public Optional<Event> event(State type) {
return Optional.ofNullable(events.get(type));
}

public History with(State type, Instant at) {
return new History(builderWithout(type).put(type, new Event(type, at)).build());
}

private ImmutableMap.Builder<State, Event> builderWithout(State type) {
ImmutableMap.Builder<State, Event> builder = ImmutableMap.builder();
events.forEach((t, at) -> {
if (t != type) {
builder.put(t, at);
}
});
return builder;
}

public static History of(State type, Instant at) {
return new History(ImmutableMap.of(type, new Event(type, at)));
}

public record Event(State type, Instant at) {

public Event {
Objects.requireNonNull(type);
Objects.requireNonNull(at);
}

}

}

public enum State {
Expand Down Expand Up @@ -58,8 +108,8 @@ public static SnapshotId generateId() {
return new SnapshotId(UUID.randomUUID());
}

public static Snapshot create(SnapshotId id, HostName hostname, ClusterId cluster, int clusterIndex, Instant at) {
return new Snapshot(id, hostname, State.creating, at, cluster, clusterIndex);
public static Snapshot create(SnapshotId id, HostName hostname, Instant at, ClusterId cluster, int clusterIndex) {
return new Snapshot(id, hostname, State.creating, History.of(State.creating, at), cluster, clusterIndex);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public Snapshot create(String hostname, Instant instant) {
" is busy with snapshot " + node.status().snapshot().get().id());
}
ClusterId cluster = new ClusterId(node.allocation().get().owner(), node.allocation().get().membership().cluster().id());
return Optional.of(Snapshot.create(id, com.yahoo.config.provision.HostName.of(hostname), cluster, node.allocation().get().membership().index(), instant));
return Optional.of(Snapshot.create(id, com.yahoo.config.provision.HostName.of(hostname), instant, cluster, node.allocation().get().membership().index()));
}, lock).get();
}
}
Expand All @@ -73,13 +73,14 @@ public Snapshot create(String hostname, Instant instant) {
public Snapshot restore(SnapshotId id, String hostname) {
try (var lock = db.lockSnapshots(hostname)) {
Snapshot snapshot = require(id, hostname);
Instant now = nodeRepository.clock().instant();
return write(id, hostname, (node) -> {
if (busy(node)) {
throw new IllegalArgumentException("Cannot restore snapshot: Node " + hostname +
" is busy with snapshot " + node.status().snapshot().get().id() + " in "+
node.status().snapshot().get().state() + " state");
}
return Optional.of(snapshot.with(Snapshot.State.restoring));
return Optional.of(snapshot.with(Snapshot.State.restoring, now));
}, lock).get();
}
}
Expand All @@ -101,12 +102,13 @@ public void remove(SnapshotId id, String hostname, boolean force) {
public Snapshot move(SnapshotId id, String hostname, Snapshot.State newState) {
try (var lock = db.lockSnapshots(hostname)) {
Snapshot current = require(id, hostname);
Instant now = nodeRepository.clock().instant();
return write(id, hostname, node -> {
if (!busyWith(id, node)) {
throw new IllegalArgumentException("Cannot move snapshot " + id + " to " + newState +
": Node " + hostname + " is not working on this snapshot");
}
return Optional.of(current.with(newState));
return Optional.of(current.with(newState, now));
}, lock).get();
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.persistence;

import com.google.common.collect.ImmutableMap;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.HostName;
import com.yahoo.slime.ArrayTraverser;
import com.yahoo.slime.Cursor;
import com.yahoo.slime.Inspector;
import com.yahoo.slime.Slime;
Expand All @@ -24,21 +26,30 @@ public class SnapshotSerializer {
private static final String ID_FIELD = "id";
private static final String HOSTNAME_FIELD = "hostname";
private static final String STATE_FIELD = "state";
private static final String CREATED_AT_FIELD = "createdAt";
private static final String INSTANCE_FIELD = "instance";
private static final String CLUSTER_FIELD = "cluster";
private static final String CLUSTER_INDEX_FIELD = "clusterIndex";
private static final String HISTORY_FIELD = "history";
private static final String EVENT_FIELD = "event";
private static final String AT_FIELD = "at";

private SnapshotSerializer() {}

public static Snapshot fromInspector(Inspector object) {
ImmutableMap.Builder<Snapshot.State, Snapshot.History.Event> history = ImmutableMap.builder();
object.field(HISTORY_FIELD).traverse((ArrayTraverser) (idx, inspector) -> {
Snapshot.State type = stateFromSlime(inspector.field(EVENT_FIELD).asString());
Instant at = Instant.ofEpochMilli(inspector.field(AT_FIELD).asLong());
history.put(type, new Snapshot.History.Event(type, at));
});
return new Snapshot(SnapshotId.of(object.field(ID_FIELD).asString()),
HostName.of(object.field(HOSTNAME_FIELD).asString()),
stateFromSlime(object.field(STATE_FIELD).asString()),
Instant.ofEpochMilli(object.field(CREATED_AT_FIELD).asLong()),
new Snapshot.History(history.build()),
new ClusterId(ApplicationId.fromSerializedForm(object.field(INSTANCE_FIELD).asString()),
ClusterSpec.Id.from(object.field(CLUSTER_FIELD).asString())),
(int) object.field(CLUSTER_INDEX_FIELD).asLong());
(int) object.field(CLUSTER_INDEX_FIELD).asLong()
);
}

public static Snapshot fromSlime(Slime slime) {
Expand Down Expand Up @@ -72,10 +83,15 @@ public static void toSlime(Snapshot snapshot, Cursor object) {
object.setString(ID_FIELD, snapshot.id().toString());
object.setString(HOSTNAME_FIELD, snapshot.hostname().value());
object.setString(STATE_FIELD, asString(snapshot.state()));
object.setLong(CREATED_AT_FIELD, snapshot.createdAt().toEpochMilli());
object.setString(INSTANCE_FIELD, snapshot.cluster().application().serializedForm());
object.setString(CLUSTER_FIELD, snapshot.cluster().cluster().value());
object.setLong(CLUSTER_INDEX_FIELD, snapshot.clusterIndex());
Cursor historyArray = object.setArray(HISTORY_FIELD);
snapshot.history().events().values().forEach(event -> {
Cursor eventObject = historyArray.addObject();
eventObject.setString(EVENT_FIELD, asString(event.type()));
eventObject.setLong(AT_FIELD, event.at().toEpochMilli());
});
}

public static String asString(Snapshot.State state) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import com.yahoo.vespa.hosted.provision.backup.SnapshotId;
import com.yahoo.vespa.hosted.provision.persistence.SnapshotSerializer;

import java.time.Instant;
import java.util.Comparator;
import java.util.List;
import java.util.Optional;
Expand Down Expand Up @@ -46,7 +47,9 @@ private SnapshotResponse(NodeRepository nodeRepository, Optional<String> hostnam
Cursor snapshotsArray = root.setArray("snapshots");
snapshots.stream()
.sorted(Comparator.comparing(Snapshot::hostname)
.thenComparing(Snapshot::createdAt))
.thenComparing(snapshot -> snapshot.history().event(Snapshot.State.creating)
.map(Snapshot.History.Event::at)
.orElse(Instant.EPOCH)))
.forEach(snapshot -> SnapshotSerializer.toSlime(snapshot, snapshotsArray.addObject()));
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package com.yahoo.vespa.hosted.provision.backup;

import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.HostName;
import com.yahoo.vespa.hosted.provision.node.ClusterId;
import org.junit.jupiter.api.Test;

import java.time.Instant;

import static org.junit.jupiter.api.Assertions.assertSame;
import static org.junit.jupiter.api.Assertions.fail;


/**
* @author mpolden
*/
class SnapshotTest {

@Test
void state_changes() {
assertAllowed(Snapshot.State.creating, Snapshot.State.created);
assertAllowed(Snapshot.State.created, Snapshot.State.restoring);
assertAllowed(Snapshot.State.restoring, Snapshot.State.restored);
assertAllowed(Snapshot.State.restored, Snapshot.State.restoring);

assertDisallowed(Snapshot.State.created, Snapshot.State.creating);
assertDisallowed(Snapshot.State.creating, Snapshot.State.restoring);
assertDisallowed(Snapshot.State.creating, Snapshot.State.restored);
assertDisallowed(Snapshot.State.restored, Snapshot.State.created);
assertDisallowed(Snapshot.State.restored, Snapshot.State.created);
}

private static void assertAllowed(Snapshot.State from, Snapshot.State to) {
Snapshot snapshot = snapshot(from);
assertSame(to, snapshot.with(to, Instant.ofEpochMilli(123)).state());
}

private static void assertDisallowed(Snapshot.State from, Snapshot.State to) {
Snapshot snapshot = snapshot(from);
try {
snapshot.with(to, Instant.ofEpochMilli(123));
fail("Changing state " + from + " -> " + to + " should fail");
} catch (IllegalArgumentException ignored) {}
}

private static Snapshot snapshot(Snapshot.State state) {
return new Snapshot(Snapshot.generateId(), HostName.of("h1.example.com"), state,
Snapshot.History.of(state, Instant.ofEpochMilli(123)), new ClusterId(ApplicationId.defaultId(), ClusterSpec.Id.from("c1")),
0);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,20 @@ void serialization() {
Snapshot snapshot0 = new Snapshot(SnapshotId.of("ccf0b6de-3e06-4045-acba-458d99ef73e5"),
HostName.of("host0.example.com"),
Snapshot.State.creating,
Instant.ofEpochMilli(123),
Snapshot.History.of(Snapshot.State.creating, Instant.ofEpochMilli(123)),
new ClusterId(ApplicationId.from("t1", "a1", "i1"),
ClusterSpec.Id.from("c1")),
1);
0
);
Snapshot snapshot1 = new Snapshot(SnapshotId.of("7e45b44a-0f1a-4729-a4f4-20fff5d1e85d"),
HostName.of("host1.example.com"),
Snapshot.State.restored,
Instant.ofEpochMilli(456),
Snapshot.History.of(Snapshot.State.restoring, Instant.ofEpochMilli(123))
.with(Snapshot.State.restored, Instant.ofEpochMilli(456)),
new ClusterId(ApplicationId.from("t2", "a2", "i2"),
ClusterSpec.Id.from("c2")),
2);
2
);
assertEquals(snapshot0, SnapshotSerializer.fromSlime(SnapshotSerializer.toSlime(snapshot0)));
List<Snapshot> snapshots = List.of(snapshot0, snapshot1);
assertEquals(snapshots, SnapshotSerializer.listFromSlime(SnapshotSerializer.toSlime(snapshots)));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,20 @@
{
"cluster": "id3",
"clusterIndex": 0,
"createdAt": 123,
"hostname": "host4.yahoo.com",
"id": "(ignore)",
"instance": "tenant3:application3:instance3",
"state": "created"
"state": "created",
"history": [
{
"at": 123,
"event": "creating"
},
{
"at": 123,
"event": "created"
}
]
}
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,16 @@
{
"cluster": "id3",
"clusterIndex": 0,
"createdAt": 123,
"hostname": "host4.yahoo.com",
"id": "(ignore)",
"instance": "tenant3:application3:instance3",
"state": "creating"
"state": "creating",
"history": [
{
"at": 123,
"event": "creating"
}
]
}
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,30 @@
{
"cluster": "id2",
"clusterIndex": 0,
"createdAt": 123,
"hostname": "host2.yahoo.com",
"id": "(ignore)",
"instance": "tenant2:application2:instance2",
"state": "creating"
"state": "creating",
"history": [
{
"at": 123,
"event": "creating"
}
]
},
{
"cluster": "id3",
"clusterIndex": 0,
"createdAt": 123,
"hostname": "host4.yahoo.com",
"id": "(ignore)",
"instance": "tenant3:application3:instance3",
"state": "creating"
"state": "creating",
"history": [
{
"at": 123,
"event": "creating"
}
]
}
]
}
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
{
"cluster": "id3",
"clusterIndex": 0,
"createdAt": 123,
"hostname": "host4.yahoo.com",
"id": "(ignore)",
"instance": "tenant3:application3:instance3",
"state": "creating"
"state": "creating",
"history": [
{
"at": 123,
"event": "creating"
}
]
}

0 comments on commit bbc6c94

Please sign in to comment.