Skip to content

Commit

Permalink
Reconcile machine before creation deadline
Browse files Browse the repository at this point in the history
  • Loading branch information
aaronfern committed Sep 21, 2023
1 parent 5b9a383 commit 7821503
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 7 deletions.
7 changes: 5 additions & 2 deletions pkg/util/provider/machinecontroller/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,14 +174,15 @@ func (c *controller) reconcileClusterMachine(ctx context.Context, machine *v1alp
}
}
if machine.Spec.ProviderID == "" || machine.Status.CurrentStatus.Phase == "" || machine.Status.CurrentStatus.Phase == v1alpha1.MachineCrashLoopBackOff {
return c.triggerCreationFlow(
retry, err = c.triggerCreationFlow(
ctx,
&driver.CreateMachineRequest{
Machine: machine,
MachineClass: machineClass,
Secret: &corev1.Secret{Data: secretData},
},
)
return c.adjustCreateRetryRequired(machine, retry), err
}

return machineutils.LongRetry, nil
Expand Down Expand Up @@ -353,7 +354,9 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
// If node label is not present
klog.V(2).Infof("Creating a VM for machine %q, please wait!", machine.Name)
klog.V(2).Infof("The machine creation is triggered with timeout of %s", c.getEffectiveCreationTimeout(createMachineRequest.Machine).Duration)
createMachineResponse, err := c.driver.CreateMachine(ctx, createMachineRequest)
createMCCtx, cancelFunc := c.getCreationContext(ctx, machine)
defer cancelFunc()
createMachineResponse, err := c.driver.CreateMachine(createMCCtx, createMachineRequest)
if err != nil {
// Create call returned an error.
klog.Errorf("Error while creating machine %s: %s", machine.Name, err.Error())
Expand Down
30 changes: 25 additions & 5 deletions pkg/util/provider/machinecontroller/machine_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ func SyncMachineTaints(
return toBeUpdated
}

// machineCreateErrorHandler TODO
// machineCreateErrorHandler handles errors when machine creation does not succeed
func (c *controller) machineCreateErrorHandler(ctx context.Context, machine *v1alpha1.Machine, createMachineResponse *driver.CreateMachineResponse, err error) (machineutils.RetryPeriod, error) {
var (
retryRequired = machineutils.MediumRetry
Expand All @@ -491,7 +491,7 @@ func (c *controller) machineCreateErrorHandler(ctx context.Context, machine *v1a
lastKnownState = createMachineResponse.LastKnownState
}

c.machineStatusUpdate(
statusUpdErr := c.machineStatusUpdate(
ctx,
machine,
v1alpha1.LastOperation{
Expand All @@ -507,8 +507,23 @@ func (c *controller) machineCreateErrorHandler(ctx context.Context, machine *v1a
},
lastKnownState,
)
return retryRequired, statusUpdErr
}

// adjustCreateRetryRequired adjusts the retry period if needed so that we don't have a case where the machine is reconciled after machineCreationTimeout
// if the retry period is too large so that, it is adjusted so that it causes a reconcile at the machineCreationTimeout
// if the machineCreationTimeout has already passed, return `ShortRetry` so that the machine is immediately reconciled
func (c *controller) adjustCreateRetryRequired(machine *v1alpha1.Machine, retryRequired machineutils.RetryPeriod) machineutils.RetryPeriod {
adjustedRetry := retryRequired
machineCreationDeadline := machine.CreationTimestamp.Time.Add(c.getEffectiveCreationTimeout(machine).Duration)
if time.Now().After(machineCreationDeadline) {
adjustedRetry = machineutils.ShortRetry
} else if time.Now().Add(time.Duration(retryRequired)).After(machineCreationDeadline) {
// Machine will reconcile after create deadline. Adapt RetryPeriod to reconcile machine at deadline
adjustedRetry = machineutils.RetryPeriod(machineCreationDeadline.Sub(time.Now()))
}

return retryRequired, nil
return adjustedRetry
}

func (c *controller) machineStatusUpdate(
Expand Down Expand Up @@ -974,7 +989,7 @@ func isConditionEmpty(condition v1.NodeCondition) bool {
return condition == v1.NodeCondition{}
}

// initializes err and description with the passed string message
// printLogInitError initializes err and description with the passed string message
func printLogInitError(s string, err *error, description *string, machine *v1alpha1.Machine) {
klog.Warningf(s+" machine: %q ", machine.Name)
*err = fmt.Errorf(s+" %s", machineutils.InitiateVMDeletion)
Expand Down Expand Up @@ -1304,7 +1319,7 @@ func (c *controller) getEffectiveHealthTimeout(machine *v1alpha1.Machine) *metav
return effectiveHealthTimeout
}

// getEffectiveHealthTimeout returns the creationTimeout set on the machine-object, otherwise returns the timeout set using the global-flag.
// getEffectiveCreationTimeout returns the creationTimeout set on the machine-object, otherwise returns the timeout set using the global-flag.
func (c *controller) getEffectiveCreationTimeout(machine *v1alpha1.Machine) *metav1.Duration {
var effectiveCreationTimeout *metav1.Duration
if machine.Spec.MachineConfiguration != nil && machine.Spec.MachineConfiguration.MachineCreationTimeout != nil {
Expand All @@ -1326,6 +1341,11 @@ func (c *controller) getEffectiveNodeConditions(machine *v1alpha1.Machine) *stri
return effectiveNodeConditions
}

func (c *controller) getCreationContext(parentCtx context.Context, machine *v1alpha1.Machine) (context.Context, context.CancelFunc) {
timeOutDuration := c.getEffectiveCreationTimeout(machine).Duration
return context.WithDeadline(parentCtx, machine.CreationTimestamp.Time.Add(timeOutDuration))
}

// UpdateNodeTerminationCondition updates termination condition on the node object
func (c *controller) UpdateNodeTerminationCondition(ctx context.Context, machine *v1alpha1.Machine) error {
if machine.Status.CurrentStatus.Phase == "" || machine.Status.CurrentStatus.Phase == v1alpha1.MachineCrashLoopBackOff {
Expand Down
67 changes: 67 additions & 0 deletions pkg/util/provider/machinecontroller/machine_util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2725,4 +2725,71 @@ var _ = Describe("machine_util", func() {
}),
)
})

Describe("#Adjust Machine Create Retry Timeout", func() {
type setup struct {
machine *machinev1.Machine
mcCreateTimeout metav1.Duration
createOpRetry machineutils.RetryPeriod
}

type expect struct {
retryAdjusted bool
}

type data struct {
setup setup
expect expect
}

DescribeTable("##table",
func(data *data) {
stop := make(chan struct{})
defer close(stop)

c, trackers := createController(stop, testNamespace, nil, nil, nil, nil)
defer trackers.Stop()

c.safetyOptions.MachineCreationTimeout = data.setup.mcCreateTimeout

adjustedRetry := c.adjustCreateRetryRequired(data.setup.machine, machineutils.RetryPeriod(data.setup.createOpRetry))

if data.expect.retryAdjusted {
Expect(adjustedRetry).Should(BeNumerically("<=", machineutils.RetryPeriod(c.getEffectiveCreationTimeout(data.setup.machine).Duration)))
} else {
Expect(adjustedRetry).To(Equal(data.setup.createOpRetry))
}
},
Entry("Should not adjust machine create retry period if retry is set to occur before machine create deadline", &data{
setup: setup{
createOpRetry: machineutils.RetryPeriod(3 * time.Minute),
mcCreateTimeout: metav1.Duration{Duration: 20 * time.Minute},
machine: newMachine(
&machinev1.MachineTemplateSpec{
Spec: machinev1.MachineSpec{},
},
&machinev1.MachineStatus{},
nil, nil, map[string]string{v1alpha1.NodeLabelKey: "test-node-0"}, true, metav1.Now()),
},
expect: expect{
retryAdjusted: false,
},
}),
Entry("Should adjust machine create retry period to machine create deadline if retry is set to occur after machine create deadline", &data{
setup: setup{
createOpRetry: machineutils.RetryPeriod(4 * time.Minute),
mcCreateTimeout: metav1.Duration{Duration: 3 * time.Minute},
machine: newMachine(
&machinev1.MachineTemplateSpec{
Spec: machinev1.MachineSpec{},
},
&machinev1.MachineStatus{},
nil, nil, map[string]string{v1alpha1.NodeLabelKey: "test-node-0"}, true, metav1.Now()),
},
expect: expect{
retryAdjusted: true,
},
}),
)
})
})

0 comments on commit 7821503

Please sign in to comment.