Skip to content

Commit

Permalink
Fix checkpoint and recovery of root lists (#1016)
Browse files Browse the repository at this point in the history
  • Loading branch information
HerbertJordan authored Sep 11, 2024
1 parent 1d1f67c commit b63bb16
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 12 deletions.
12 changes: 10 additions & 2 deletions go/database/mpt/archive_trie.go
Original file line number Diff line number Diff line change
Expand Up @@ -731,6 +731,9 @@ func (l *rootList) Prepare(checkpoint checkpoint.Checkpoint) error {
if l.checkpoint+1 != checkpoint {
return fmt.Errorf("checkpoint mismatch, expected %v, got %v", l.checkpoint+1, checkpoint)
}
if err := l.storeRoots(); err != nil {
return err
}
pendingFile := filepath.Join(l.directory, fileNameArchiveRootsPreparedCheckpoint)
return writeRootListCheckpointData(pendingFile, rootListCheckpointData{
Checkpoint: checkpoint,
Expand Down Expand Up @@ -776,16 +779,21 @@ func getRootListRestorer(archiveDir string) rootListRestorer {
}

func (r rootListRestorer) Restore(checkpoint checkpoint.Checkpoint) error {
meta, err := readRootListCheckpointData(filepath.Join(r.directory, fileNameArchiveRootsCommittedCheckpoint))
committedFile := filepath.Join(r.directory, fileNameArchiveRootsCommittedCheckpoint)
meta, err := readRootListCheckpointData(committedFile)
if err != nil {
return err
}

// If the given checkpoint is one step in the future, check whether there is a pending checkpoint.
if meta.Checkpoint+1 == checkpoint {
pending, err := readRootListCheckpointData(filepath.Join(r.directory, fileNameArchiveRootsPreparedCheckpoint))
pendingFile := filepath.Join(r.directory, fileNameArchiveRootsPreparedCheckpoint)
pending, err := readRootListCheckpointData(pendingFile)
if err == nil && pending.Checkpoint == checkpoint {
meta = pending
if err := os.Rename(pendingFile, committedFile); err != nil {
return err
}
}
}

Expand Down
107 changes: 97 additions & 10 deletions go/database/mpt/archive_trie_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ import (
"context"
"errors"
"fmt"
"github.com/Fantom-foundation/Carmen/go/database/mpt/shared"
"math/rand"
"os"
"path/filepath"
Expand All @@ -29,14 +28,13 @@ import (
"time"

"github.com/Fantom-foundation/Carmen/go/backend/archive"
"github.com/Fantom-foundation/Carmen/go/backend/utils"
"github.com/Fantom-foundation/Carmen/go/backend/utils/checkpoint"
"github.com/Fantom-foundation/Carmen/go/common"
"github.com/Fantom-foundation/Carmen/go/common/amount"
"golang.org/x/exp/maps"

"github.com/Fantom-foundation/Carmen/go/database/mpt/shared"
"go.uber.org/mock/gomock"

"github.com/Fantom-foundation/Carmen/go/backend/utils"
"github.com/Fantom-foundation/Carmen/go/common"
"golang.org/x/exp/maps"
)

// Note: most properties of the ArchiveTrie are tested through the common
Expand Down Expand Up @@ -3094,6 +3092,30 @@ func TestRootList_GuaranteeCheckpoint_CommitsPendingCheckpoint(t *testing.T) {
}
}

func TestRootList_Prepare_FlushesRootsToDisk(t *testing.T) {
dir := t.TempDir()
roots, err := loadRoots(dir)
if err != nil {
t.Fatalf("failed to load roots: %v", err)
}

roots.append(Root{NodeRef: NewNodeReference(ValueId(1))})
roots.append(Root{NodeRef: NewNodeReference(ValueId(2))})

if want, got := 0, roots.numRootsInFile; want != got {
t.Fatalf("unexpected number of roots in file, wanted %d, got %d", want, got)
}

cp := checkpoint.Checkpoint(1)
if err := roots.Prepare(cp); err != nil {
t.Fatalf("failed to prepare checkpoint: %v", err)
}

if want, got := 2, roots.numRootsInFile; want != got {
t.Fatalf("unexpected number of roots in file, wanted %d, got %d", want, got)
}
}

func TestRootList_Prepare_OnlyAcceptsIncrementalCheckpoints(t *testing.T) {
dir := t.TempDir()
roots, err := loadRoots(dir)
Expand All @@ -3107,6 +3129,28 @@ func TestRootList_Prepare_OnlyAcceptsIncrementalCheckpoints(t *testing.T) {
}
}

func TestRootList_Prepare_DetectsFlushIssue(t *testing.T) {
dir := t.TempDir()
roots, err := loadRoots(dir)
if err != nil {
t.Fatalf("failed to load roots: %v", err)
}

roots.append(Root{NodeRef: NewNodeReference(ValueId(1))})
if err := roots.storeRoots(); err != nil {
t.Fatalf("failed to store roots: %v", err)
}
if err := os.Chmod(roots.filename, 0400); err != nil {
t.Fatalf("failed to make roots file read-only: %v", err)
}

roots.append(Root{NodeRef: NewNodeReference(ValueId(1))})
cp := checkpoint.Checkpoint(1)
if err := roots.Prepare(cp); err == nil {
t.Fatalf("expected error when roots could not be flushed to disk")
}
}

func TestRootList_Prepare_FailsOnIOError(t *testing.T) {
dir := t.TempDir()
roots, err := loadRoots(dir)
Expand Down Expand Up @@ -3208,25 +3252,38 @@ func TestRootList_Restore_CanRecoverCorruptedRoots(t *testing.T) {
for _, name := range []string{"prepared", "committed"} {
t.Run(name, func(t *testing.T) {
dir := t.TempDir()
committedFile := filepath.Join(dir, fileNameArchiveRootsCheckpointDirectory, fileNameArchiveRootsCommittedCheckpoint)
pendingFile := filepath.Join(dir, fileNameArchiveRootsCheckpointDirectory, fileNameArchiveRootsPreparedCheckpoint)

roots, err := loadRoots(dir)
if err != nil {
t.Fatalf("failed to load roots: %v", err)
}
roots.append(Root{NodeRef: NewNodeReference(ValueId(123))})
roots.append(Root{NodeRef: NewNodeReference(ValueId(123))})

if err := roots.storeRoots(); err != nil {
t.Fatalf("failed to store roots: %v", err)
}

cp := checkpoint.Checkpoint(1)
if err := roots.Prepare(cp); err != nil {
t.Fatalf("failed to prepare checkpoint: %v", err)
}

if _, err := os.Stat(committedFile); !errors.Is(err, os.ErrNotExist) {
t.Errorf("expected committed checkpoint file not to exist")
}
if _, err := os.Stat(pendingFile); err != nil {
t.Errorf("expected pending checkpoint file to exist")
}

if name == "committed" {
if err := roots.Commit(cp); err != nil {
t.Fatalf("failed to commit checkpoint: %v", err)
}
if _, err := os.Stat(committedFile); err != nil {
t.Errorf("expected committed checkpoint file to exist")
}
if _, err := os.Stat(pendingFile); !errors.Is(err, os.ErrNotExist) {
t.Errorf("expected pending checkpoint file to be deleted")
}
}

backup, err := os.ReadFile(roots.filename)
Expand All @@ -3251,6 +3308,13 @@ func TestRootList_Restore_CanRecoverCorruptedRoots(t *testing.T) {
if !bytes.Equal(backup, restored) {
t.Fatalf("unexpected restored file content")
}

if _, err := os.Stat(committedFile); err != nil {
t.Errorf("expected committed checkpoint file to exist")
}
if _, err := os.Stat(pendingFile); !errors.Is(err, os.ErrNotExist) {
t.Errorf("expected pending checkpoint file to be deleted")
}
})
}
}
Expand All @@ -3273,6 +3337,29 @@ func TestRootList_Restore_FailsIfCheckpointFileCanNotBeRead(t *testing.T) {
}
}

func TestRootList_Restore_FailsIfPendingCheckpointFileCanNotBeRenamed(t *testing.T) {
dir := t.TempDir()

roots, err := loadRoots(dir)
if err != nil {
t.Fatalf("failed to load roots: %v", err)
}
cp := checkpoint.Checkpoint(1)
if err := roots.Prepare(cp); err != nil {
t.Fatalf("failed to prepare checkpoint: %v", err)
}

// sabotage the renaming of the pending to the committed checkpoint
if err := os.Chmod(roots.directory, 0500); err != nil {
t.Fatalf("failed to make directory read-only: %v", err)
}
defer os.Chmod(roots.directory, 0700)

if err := getRootListRestorer(dir).Restore(cp); err == nil {
t.Errorf("expected recovery error due to read-only directory")
}
}

func TestRootList_Restore_FailsIfAskedToRecoverUnknownCheckpoint(t *testing.T) {
dir := t.TempDir()

Expand Down

0 comments on commit b63bb16

Please sign in to comment.