From 78440771ffebec2fa7aa9fa16763c463696e1353 Mon Sep 17 00:00:00 2001 From: HuijingHei Date: Thu, 19 Dec 2024 10:47:33 +0800 Subject: [PATCH] tests: add ostree.sync test Add test for https://issues.redhat.com/browse/OCPBUGS-15917, to verify ostree can sync the filesystem with the disconnected network volume(NFS). As we do not have ceph for testing, according to the suggestion from Colin and Joseph: `use something like NFS, we should in theory see the same error if we disconnected the NFS volume and we could not sync the filesystem.` --- mantle/kola/tests/ostree/sync.go | 246 +++++++++++++++++++++++++++++++ 1 file changed, 246 insertions(+) create mode 100644 mantle/kola/tests/ostree/sync.go diff --git a/mantle/kola/tests/ostree/sync.go b/mantle/kola/tests/ostree/sync.go new file mode 100644 index 0000000000..e9b1e39db1 --- /dev/null +++ b/mantle/kola/tests/ostree/sync.go @@ -0,0 +1,246 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ostree + +import ( + "fmt" + "strings" + "time" + + "github.com/coreos/coreos-assembler/mantle/kola" + "github.com/coreos/coreos-assembler/mantle/kola/cluster" + "github.com/coreos/coreos-assembler/mantle/kola/register" + "github.com/coreos/coreos-assembler/mantle/platform" + "github.com/coreos/coreos-assembler/mantle/platform/conf" + "github.com/coreos/coreos-assembler/mantle/platform/machine/qemu" + "github.com/coreos/coreos-assembler/mantle/util" +) + +// https://github.com/coreos/coreos-assembler/pull/3998#issuecomment-2589994641 +var nfs_server_butane = conf.Butane(`variant: fcos +version: 1.5.0 +storage: + directories: + - path: /var/nfs1/share + mode: 0777 + - path: /var/nfs2/share + mode: 0777 + - path: /var/nfs3/share + mode: 0777 + - path: /var/nfs4/share + mode: 0777 + - path: /var/nfs5/share + mode: 0777 + - path: /var/nfs6/share + mode: 0777 + files: + - path: "/etc/exports" + overwrite: true + contents: + inline: | + /var/nfs1/share *(rw,insecure,no_root_squash) + /var/nfs2/share *(rw,insecure,no_root_squash) + /var/nfs3/share *(rw,insecure,no_root_squash) + /var/nfs4/share *(rw,insecure,no_root_squash) + /var/nfs5/share *(rw,insecure,no_root_squash) + /var/nfs6/share *(rw,insecure,no_root_squash) + - path: "/var/lib/nfs/etab" +systemd: + units: + - name: "nfs-server.service" + enabled: true`) + +func init() { + register.RegisterTest(®ister.Test{ + // See https://github.com/ostreedev/ostree/pull/2968 + Run: ostreeSyncTest, + ClusterSize: 0, + Name: "ostree.sync", + Description: "Verify ostree can sync the filesystem with disconnected the NFS volume.", + Distros: []string{"rhcos"}, + Platforms: []string{"qemu"}, + Tags: []string{"ostree", kola.SkipBaseChecksTag, kola.NeedsInternetTag}, + }) +} + +// NFS server +type NfsServer struct { + Machine platform.Machine + MachineAddress string +} + +func setupNFSMachine(c cluster.TestCluster) NfsServer { + var m platform.Machine + var err error + var nfs_server string + + options := platform.QemuMachineOptions{ + HostForwardPorts: []platform.HostForwardPort{ + {Service: "ssh", HostPort: 0, GuestPort: 22}, + {Service: "nfs", HostPort: 2049, GuestPort: 2049}, + }, + } + options.MinMemory = 2048 + + // start the machine + switch c := c.Cluster.(type) { + // These cases have to be separated because when put together to the same case statement + // the golang compiler no longer checks that the individual types in the case have the + // NewMachineWithQemuOptions function, but rather whether platform.Cluster + // does which fails + case *qemu.Cluster: + m, err = c.NewMachineWithQemuOptions(nfs_server_butane, options) + nfs_server = "10.0.2.2" + default: + m, err = c.NewMachine(nfs_server_butane) + nfs_server = m.PrivateIP() + } + if err != nil { + c.Fatal(err) + } + + // Wait for nfs server to become active + err = util.Retry(6, 10*time.Second, func() error { + nfs_status := c.MustSSH(m, "systemctl is-active nfs-server.service") + if string(nfs_status) != "active" { + return fmt.Errorf("nfs-server.service is not ready: %s.", string(nfs_status)) + } + return nil + }) + if err != nil { + c.Fatalf("Timeout(1m) while waiting for nfs-server.service to be ready: %v", err) + } + return NfsServer{ + Machine: m, + MachineAddress: nfs_server, + } +} + +// Refer to the steps: +// https://issues.redhat.com/browse/ECOENGCL-91?focusedId=26272587&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-26272587 +func ostreeSyncTest(c cluster.TestCluster) { + // Start nfs server machine + nfs_server := setupNFSMachine(c) + + // Start test machine + butane := conf.Butane(`variant: fcos +version: 1.5.0 +storage: + directories: + - path: /var/tmp/data1 + mode: 0777 + - path: /var/tmp/data2 + mode: 0777 + - path: /var/tmp/data3 + mode: 0777 + - path: /var/tmp/data4 + mode: 0777 + - path: /var/tmp/data5 + mode: 0777 + - path: /var/tmp/data6 + mode: 0777 + files: + - path: /etc/systemd/system.conf + overwrite: true + contents: + inline: | + [Manager] + DefaultTimeoutStopSec=5s`) + opts := platform.MachineOptions{ + MinMemory: 2048, + } + var nfs_client platform.Machine + var err error + + switch c := c.Cluster.(type) { + case *qemu.Cluster: + nfs_client, err = c.NewMachineWithOptions(butane, opts) + default: + nfs_client, err = c.NewMachine(butane) + } + if err != nil { + c.Fatalf("Unable to create test machine: %v", err) + } + + // Wait for test machine + err = util.Retry(6, 10*time.Second, func() error { + _ = c.MustSSHf(nfs_client, `for i in $(seq 6); do + sudo mount -t nfs4 %s:/var/nfs$i/share /var/tmp/data$i + done`, nfs_server.MachineAddress) + + mounts := c.MustSSH(nfs_client, "sudo df -Th | grep nfs | wc -l") + if string(mounts) != "6" { + c.Fatalf("Can not mount all nfs") + } + c.Log("Got NFS mount.") + return nil + }) + if err != nil { + c.Fatalf("Timeout(1m) to get nfs mount: %v", err) + } + + doSyncTest(c, nfs_client) +} + +func doSyncTest(c cluster.TestCluster, client platform.Machine) { + c.RunCmdSync(client, "sudo touch /var/tmp/data3/test") + // Continue write + go func() { + _, err := c.SSH(client, `for i in $(seq 6); do + (while sudo rm -f /var/tmp/data$i/test; do \ + for x in $(seq 6); do \ + set -x; \ + sudo dd if=/dev/urandom of=/var/tmp/data$i/test bs=4096 count=2048 conv=notrunc oflag=append &> /dev/null; \ + set +x; \ + sleep 0.5; \ + done; \ + done) & + done`) + if err != nil { + c.Fatalf("failed to run dd command: %v", err) + } + }() + + // Create a stage deploy using kargs while writing + c.RunCmdSync(client, "sudo rpm-ostree kargs --append=test=1") + + netdevices := c.MustSSH(client, "ls /sys/class/net | grep -v lo") + netdevice := string(netdevices) + if netdevice == "" { + c.Fatalf("failed to get net device") + } + c.Log("Set link down and rebooting.") + // Skip the error check as it is expected + cmd := fmt.Sprintf("sudo systemd-run sh -c 'ip link set %s down && sleep 5 && systemctl reboot'", netdevice) + _, err := c.SSH(client, cmd) + if err != nil { + c.Fatalf("failed to set down link and reboot: %v", err) + } + + err = util.Retry(8, 10*time.Second, func() error { + // Look for the kernel argument test=1 + kernelArguments, err := c.SSH(client, "cat /proc/cmdline") + if err != nil { + return fmt.Errorf("failed to read /proc/cmdline: %w", err) + } else if !strings.Contains(string(kernelArguments), "test=1") { + c.Fatalf("Not found test=1 in kernel argument after rebooted") + } + return nil + }) + if err != nil { + c.Fatalf("Unable to reboot machine: %v", err) + } + c.Log("Found test=1 in kernel argument after rebooted.") +}