From 71494016e3d1f5c576ecd02b5ace445046b750c3 Mon Sep 17 00:00:00 2001 From: Cyril Gaudin Date: Sat, 31 Dec 2022 14:09:16 +0100 Subject: [PATCH] fix: segfault on auto-revert with namespace (#1) fix https://github.com/hashicorp/levant/issues/426 --- levant/auto_revert.go | 27 ++++++++--------- test/acctest/acctest.go | 18 +++++++++--- test/acctest/deploy.go | 4 +++ test/deploy_test.go | 31 ++++++++++++++++++++ test/fixtures/deploy_namespace.nomad | 43 ++++++++++++++++++++++++++++ 5 files changed, 106 insertions(+), 17 deletions(-) create mode 100644 test/fixtures/deploy_namespace.nomad diff --git a/levant/auto_revert.go b/levant/auto_revert.go index 4200838d4..257ba9372 100644 --- a/levant/auto_revert.go +++ b/levant/auto_revert.go @@ -3,40 +3,40 @@ package levant import ( "time" + "github.com/hashicorp/nomad/api" nomad "github.com/hashicorp/nomad/api" "github.com/rs/zerolog/log" ) -func (l *levantDeployment) autoRevert(jobID, depID *string) { +func (l *levantDeployment) autoRevert(dep *nomad.Deployment) { // Setup a loop in order to retry a race condition whereby Levant may query // the latest deployment (auto-revert dep) before it has been started. i := 0 for i := 0; i < 5; i++ { - - dep, _, err := l.nomad.Jobs().LatestDeployment(*jobID, nil) + revertDep, _, err := l.nomad.Jobs().LatestDeployment(dep.JobID, &api.QueryOptions{Namespace: dep.Namespace}) if err != nil { - log.Error().Msgf("levant/auto_revert: unable to query latest deployment of job %s", *jobID) + log.Error().Msgf("levant/auto_revert: unable to query latest deployment of job %s", dep.JobID) return } // Check whether we have got the original deployment ID as a return from // Nomad, and if so, continue the loop to try again. - if dep.ID == *depID { - log.Debug().Msgf("levant/auto_revert: auto-revert deployment not triggered for job %s, rechecking", *jobID) + if revertDep.ID == dep.ID { + log.Debug().Msgf("levant/auto_revert: auto-revert deployment not triggered for job %s, rechecking", dep.JobID) time.Sleep(1 * time.Second) continue } - log.Info().Msgf("levant/auto_revert: beginning deployment watcher for job %s", *jobID) - success := l.deploymentWatcher(dep.ID) + log.Info().Msgf("levant/auto_revert: beginning deployment watcher for job %s", dep.JobID) + success := l.deploymentWatcher(revertDep.ID) if success { - log.Info().Msgf("levant/auto_revert: auto-revert of job %s was successful", *jobID) + log.Info().Msgf("levant/auto_revert: auto-revert of job %s was successful", dep.JobID) break } else { - log.Error().Msgf("levant/auto_revert: auto-revert of job %s failed; POTENTIAL OUTAGE SITUATION", *jobID) - l.checkFailedDeployment(&dep.ID) + log.Error().Msgf("levant/auto_revert: auto-revert of job %s failed; POTENTIAL OUTAGE SITUATION", dep.JobID) + l.checkFailedDeployment(&revertDep.ID) break } } @@ -44,7 +44,7 @@ func (l *levantDeployment) autoRevert(jobID, depID *string) { // At this point we have not been able to get the latest deploymentID that // is different from the original so we can't perform auto-revert checking. if i == 5 { - log.Error().Msgf("levant/auto_revert: unable to check auto-revert of job %s", *jobID) + log.Error().Msgf("levant/auto_revert: unable to check auto-revert of job %s", dep.JobID) } } @@ -60,6 +60,7 @@ func (l *levantDeployment) checkAutoRevert(dep *nomad.Deployment) { for _, v := range dep.TaskGroups { if v.AutoRevert { revert = true + break } } @@ -68,7 +69,7 @@ func (l *levantDeployment) checkAutoRevert(dep *nomad.Deployment) { dep.JobID) // Run the levant autoRevert function. - l.autoRevert(&dep.JobID, &dep.ID) + l.autoRevert(dep) } else { log.Info().Msgf("levant/auto_revert: job %v is not in auto-revert; POTENTIAL OUTAGE SITUATION", dep.JobID) } diff --git a/test/acctest/acctest.go b/test/acctest/acctest.go index 8e8692ce4..eb689f083 100644 --- a/test/acctest/acctest.go +++ b/test/acctest/acctest.go @@ -12,6 +12,9 @@ type TestCase struct { // Steps are ran in order stopping on failure Steps []TestStep + // SetupFunc is called at before the steps + SetupFunc TestStateFunc + // CleanupFunc is called at the end of the TestCase CleanupFunc TestStateFunc } @@ -42,8 +45,9 @@ type TestStateFunc func(*TestState) error // TestState is the configuration for the TestCase type TestState struct { - JobName string - Nomad *nomad.Client + JobName string + Namespace string + Nomad *nomad.Client } // Test executes a single TestCase @@ -62,6 +66,12 @@ func Test(t *testing.T, c TestCase) { Nomad: nomad, } + if c.SetupFunc != nil { + if err := c.SetupFunc(state); err != nil { + t.Errorf("setup failed: %s", err) + } + } + for i, step := range c.Steps { stepNum := i + 1 @@ -102,7 +112,7 @@ func Test(t *testing.T, c TestCase) { // CleanupPurgeJob is a cleanup func to purge the TestCase job from Nomad func CleanupPurgeJob(s *TestState) error { - _, _, err := s.Nomad.Jobs().Deregister(s.JobName, true, nil) + _, _, err := s.Nomad.Jobs().Deregister(s.JobName, true, &nomad.WriteOptions{Namespace: s.Namespace}) return err } @@ -110,7 +120,7 @@ func CleanupPurgeJob(s *TestState) error { // the TestCase job matches the desired status func CheckDeploymentStatus(status string) TestStateFunc { return func(s *TestState) error { - deploy, _, err := s.Nomad.Jobs().LatestDeployment(s.JobName, nil) + deploy, _, err := s.Nomad.Jobs().LatestDeployment(s.JobName, &nomad.QueryOptions{Namespace: s.Namespace}) if err != nil { return err } diff --git a/test/acctest/deploy.go b/test/acctest/deploy.go index cf825d69e..ca05b2d5e 100644 --- a/test/acctest/deploy.go +++ b/test/acctest/deploy.go @@ -31,6 +31,10 @@ func (c DeployTestStepRunner) Run(s *TestState) error { return fmt.Errorf("error rendering template: %s", err) } + if job.Namespace != nil { + s.Namespace = *job.Namespace + } + cfg := &levant.DeployConfig{ Deploy: &structs.DeployConfig{ Canary: c.Canary, diff --git a/test/deploy_test.go b/test/deploy_test.go index f30e0e499..ccd72f3b8 100644 --- a/test/deploy_test.go +++ b/test/deploy_test.go @@ -1,9 +1,11 @@ package test import ( + "fmt" "testing" "github.com/hashicorp/levant/test/acctest" + "github.com/hashicorp/nomad/api" ) func TestDeploy_basic(t *testing.T) { @@ -20,6 +22,35 @@ func TestDeploy_basic(t *testing.T) { }) } +func TestDeploy_namespace(t *testing.T) { + acctest.Test(t, acctest.TestCase{ + SetupFunc: func(s *acctest.TestState) error { + if _, err := s.Nomad.Namespaces().Register(&api.Namespace{Name: "test"}, nil); err != nil { + return fmt.Errorf("could not create test namespace: %w", err) + } + return nil + }, + Steps: []acctest.TestStep{ + { + Runner: acctest.DeployTestStepRunner{ + FixtureName: "deploy_namespace.nomad", + }, + Check: acctest.CheckDeploymentStatus("successful"), + }, + }, + CleanupFunc: func(s *acctest.TestState) error { + if err := acctest.CleanupPurgeJob(s); err != nil { + return err + } + + if _, err := s.Nomad.Namespaces().Delete("test", nil); err != nil { + return fmt.Errorf("could not delete namespace test: %w", err) + } + return nil + }, + }) +} + func TestDeploy_driverError(t *testing.T) { acctest.Test(t, acctest.TestCase{ Steps: []acctest.TestStep{ diff --git a/test/fixtures/deploy_namespace.nomad b/test/fixtures/deploy_namespace.nomad new file mode 100644 index 000000000..a5ae5dde7 --- /dev/null +++ b/test/fixtures/deploy_namespace.nomad @@ -0,0 +1,43 @@ +# tests a healthy deployment + +job "[[.job_name]]" { + datacenters = ["dc1"] + type = "service" + namespace = "test" + update { + max_parallel = 1 + min_healthy_time = "10s" + healthy_deadline = "1m" + auto_revert = true + } + + group "test" { + count = 1 + restart { + attempts = 10 + interval = "5m" + delay = "25s" + mode = "delay" + } + ephemeral_disk { + size = 300 + } + task "alpine" { + driver = "docker" + config { + image = "alpine" + command = "tail" + args = [ + "-f", "/dev/null" + ] + } + resources { + cpu = 100 + memory = 128 + network { + mbits = 10 + } + } + } + } +}