Skip to content

Commit

Permalink
Modify Machine Creation flow to make sure node label is updated befor…
Browse files Browse the repository at this point in the history
…e initialization of VM. Modify Deletion flow to call DeleteMachine even if VM is not found. (#940)

* Change order in which labels are updated and machine is initialized

* Change how node label is updated in getVMStatus. Proceed to drain if VM not found

* Remove error log when initializeMachine is unimplemented

* Change where initializeMachine is called in triggerCreationFlow

* Correct GetVMStatus flow from previous commit

* Remove unnecessary comments

* Initialize VM and update labels in the same reconciliation

* Fix make check errors

* Address review comments

* Address review comments part 2

* Add comment for future changes to triggerCreationFlow to clean up dirty code

* Correct err variable assignment

* Shift comment to the right place
  • Loading branch information
thiyyakat authored Sep 13, 2024
1 parent da00fec commit 9bded0e
Show file tree
Hide file tree
Showing 4 changed files with 139 additions and 126 deletions.
7 changes: 2 additions & 5 deletions pkg/util/provider/driver/fake.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,17 +92,14 @@ func (d *FakeDriver) DeleteMachine(_ context.Context, deleteMachineRequest *Dele

// GetMachineStatus makes a gRPC call to the driver to check existance of machine
func (d *FakeDriver) GetMachineStatus(_ context.Context, _ *GetMachineStatusRequest) (*GetMachineStatusResponse, error) {
switch {
case !d.VMExists:
if !d.VMExists {
errMessage := "Fake plugin is returning no VM instances backing this machine object"
return nil, status.Error(codes.NotFound, errMessage)
case d.Err != nil:
return nil, d.Err
}
return &GetMachineStatusResponse{
ProviderID: d.ProviderID,
NodeName: d.NodeName,
}, nil
}, d.Err
}

// ListMachines have to list machines
Expand Down
98 changes: 48 additions & 50 deletions pkg/util/provider/machinecontroller/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,6 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
Secret: createMachineRequest.Secret,
},
)

if err != nil {
// VM with required name is not found.
machineErr, ok := status.FromError(err)
Expand All @@ -387,7 +386,6 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
return machineutils.MediumRetry, err
}
klog.Warningf("For machine %q, obtained VM error status as: %s", machineName, machineErr)

// Decoding machine error code
switch machineErr.Code() {
case codes.NotFound, codes.Unimplemented:
Expand All @@ -404,13 +402,10 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
klog.Errorf("Error while creating machine %s: %s", machine.Name, err.Error())
return c.machineCreateErrorHandler(ctx, machine, createMachineResponse, err)
}

nodeName = createMachineResponse.NodeName
providerID = createMachineResponse.ProviderID

// Creation was successful
klog.V(2).Infof("Created new VM for machine: %q with ProviderID: %q and backing node: %q", machine.Name, providerID, getNodeName(machine))

// If a node obj already exists by the same nodeName, treat it as a stale node and trigger machine deletion.
// TODO: there is a case with Azure where the VM may join the cluster before the CreateMachine call is completed,
// and that would make an otherwise healthy node, be marked as stale.
Expand Down Expand Up @@ -497,6 +492,10 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
case codes.Uninitialized:
uninitializedMachine = true
klog.Infof("VM instance associated with machine %s was created but not initialized.", machine.Name)
//clean me up. I'm dirty.
//TODO@thiyyakat add a pointer to a boolean variable indicating whether initialization has happened successfully.
nodeName = getMachineStatusResponse.NodeName
providerID = getMachineStatusResponse.ProviderID

default:
updateRetryPeriod, updateErr := c.machineStatusUpdate(
Expand Down Expand Up @@ -527,43 +526,23 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
nodeName = getMachineStatusResponse.NodeName
providerID = getMachineStatusResponse.ProviderID
}

//Update labels, providerID
var clone *v1alpha1.Machine
clone, err = c.updateLabels(ctx, createMachineRequest.Machine, nodeName, providerID)
//initialize VM if not initialized
if uninitializedMachine {
retryPeriod, err := c.initializeMachine(ctx, createMachineRequest.Machine, createMachineRequest.MachineClass, createMachineRequest.Secret)
var retryPeriod machineutils.RetryPeriod
retryPeriod, err = c.initializeMachine(ctx, clone, createMachineRequest.MachineClass, createMachineRequest.Secret)
if err != nil {
return retryPeriod, err
}
// Return error even when machine object is updated
err = fmt.Errorf("machine creation in process. Machine initialization (if required) is successful")
return machineutils.ShortRetry, err
}

_, machineNodeLabelPresent := createMachineRequest.Machine.Labels[v1alpha1.NodeLabelKey]
_, machinePriorityAnnotationPresent := createMachineRequest.Machine.Annotations[machineutils.MachinePriority]

if !machineNodeLabelPresent || !machinePriorityAnnotationPresent || machine.Spec.ProviderID == "" {
clone := machine.DeepCopy()
if clone.Labels == nil {
clone.Labels = make(map[string]string)
}
clone.Labels[v1alpha1.NodeLabelKey] = nodeName
if clone.Annotations == nil {
clone.Annotations = make(map[string]string)
}
if clone.Annotations[machineutils.MachinePriority] == "" {
clone.Annotations[machineutils.MachinePriority] = "3"
}

clone.Spec.ProviderID = providerID
_, err := c.controlMachineClient.Machines(clone.Namespace).Update(ctx, clone, metav1.UpdateOptions{})
if err != nil {
klog.Warningf("Machine UPDATE failed for %q. Retrying, error: %s", machine.Name, err)
} else {
klog.V(2).Infof("Machine labels/annotations UPDATE for %q", machine.Name)

// Return error even when machine object is updated
err = fmt.Errorf("Machine creation in process. Machine UPDATE successful")
}
if err != nil {
return machineutils.ShortRetry, err
}

if machine.Status.CurrentStatus.Phase == "" || machine.Status.CurrentStatus.Phase == v1alpha1.MachineCrashLoopBackOff {
clone := machine.DeepCopy()
clone.Status.LastOperation = v1alpha1.LastOperation{
Expand All @@ -577,23 +556,49 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
TimeoutActive: true,
LastUpdateTime: metav1.Now(),
}

_, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{})
if err != nil {
klog.Warningf("Machine/status UPDATE failed for %q. Retrying, error: %s", machine.Name, err)
} else {
klog.V(2).Infof("Machine/status UPDATE for %q during creation", machine.Name)

// Return error even when machine object is updated
err = fmt.Errorf("Machine creation in process. Machine/Status UPDATE successful")
err = fmt.Errorf("machine creation in process. Machine/Status UPDATE successful")
}

return machineutils.ShortRetry, err
}

return machineutils.LongRetry, nil
}

func (c *controller) updateLabels(ctx context.Context, machine *v1alpha1.Machine, nodeName, providerID string) (clone *v1alpha1.Machine, err error) {
machineNodeLabelPresent := metav1.HasLabel(machine.ObjectMeta, v1alpha1.NodeLabelKey)
machinePriorityAnnotationPresent := metav1.HasAnnotation(machine.ObjectMeta, machineutils.MachinePriority)
clone = machine.DeepCopy()
if !machineNodeLabelPresent || !machinePriorityAnnotationPresent || machine.Spec.ProviderID == "" {
if clone.Labels == nil {
clone.Labels = make(map[string]string)
}
clone.Labels[v1alpha1.NodeLabelKey] = nodeName
if clone.Annotations == nil {
clone.Annotations = make(map[string]string)
}
if clone.Annotations[machineutils.MachinePriority] == "" {
clone.Annotations[machineutils.MachinePriority] = "3"
}
clone.Spec.ProviderID = providerID
var updatedMachine *v1alpha1.Machine
updatedMachine, err = c.controlMachineClient.Machines(clone.Namespace).Update(ctx, clone, metav1.UpdateOptions{})
if err != nil {
klog.Warningf("Machine labels/annotations UPDATE failed for %q. Will retry after VM initialization (if required), error: %s", machine.Name, err)
clone = machine.DeepCopy()
} else {
clone = updatedMachine
klog.V(2).Infof("Machine labels/annotations UPDATE for %q", clone.Name)
err = fmt.Errorf("machine creation in process. Machine labels/annotations update is successful")
}
}
return clone, err
}

func (c *controller) initializeMachine(ctx context.Context, machine *v1alpha1.Machine, machineClass *v1alpha1.MachineClass, secret *corev1.Secret) (machineutils.RetryPeriod, error) {
req := &driver.InitializeMachineRequest{
Machine: machine,
Expand All @@ -608,15 +613,11 @@ func (c *controller) initializeMachine(ctx context.Context, machine *v1alpha1.Ma
klog.Errorf("Cannot decode Driver error for machine %q: %s. Unexpected behaviour as Driver errors are expected to be of type status.Status", machine.Name, err)
return machineutils.LongRetry, err
}
klog.Errorf("Error occurred while initializing VM instance for machine %q: %s", machine.Name, err)
switch errStatus.Code() {
case codes.Unimplemented:
if errStatus.Code() == codes.Unimplemented {
klog.V(3).Infof("Provider does not support Driver.InitializeMachine - skipping VM instance initialization for %q.", machine.Name)
return 0, nil
case codes.NotFound:
klog.Warningf("No VM instance found for machine %q. Skipping VM instance initialization.", machine.Name)
return 0, nil
}
klog.Errorf("Error occurred while initializing VM instance for machine %q: %s", machine.Name, err)
updateRetryPeriod, updateErr := c.machineStatusUpdate(
ctx,
machine,
Expand All @@ -633,14 +634,11 @@ func (c *controller) initializeMachine(ctx context.Context, machine *v1alpha1.Ma
},
machine.Status.LastKnownState,
)

if updateErr != nil {
return updateRetryPeriod, updateErr
}

return machineutils.ShortRetry, err
}

klog.V(3).Infof("VM instance %q for machine %q was initialized", resp.ProviderID, machine.Name)
return 0, nil
}
Expand All @@ -661,7 +659,7 @@ func (c *controller) triggerDeletionFlow(ctx context.Context, deleteMachineReque
return c.setMachineTerminationStatus(ctx, deleteMachineRequest)

case strings.Contains(machine.Status.LastOperation.Description, machineutils.GetVMStatus):
return c.getVMStatus(
return c.updateMachineStatusAndNodeLabel(
ctx,
&driver.GetMachineStatusRequest{
Machine: deleteMachineRequest.Machine,
Expand Down
11 changes: 6 additions & 5 deletions pkg/util/provider/machinecontroller/machine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,7 @@ var _ = Describe("machine", func() {
ProviderID: "fakeID",
},
}, nil, nil, nil, map[string]string{v1alpha1.NodeLabelKey: "fakeNode-0"}, true, metav1.Now()),
err: fmt.Errorf("Machine creation in process. Machine UPDATE successful"),
err: fmt.Errorf("machine creation in process. Machine initialization (if required) is successful"),
retry: machineutils.ShortRetry,
},
}),
Expand Down Expand Up @@ -623,7 +623,7 @@ var _ = Describe("machine", func() {
true,
metav1.Now(),
),
err: fmt.Errorf("Machine creation in process. Machine/Status UPDATE successful"),
err: fmt.Errorf("machine creation in process. Machine/Status UPDATE successful"),
retry: machineutils.ShortRetry,
},
}),
Expand Down Expand Up @@ -1016,7 +1016,7 @@ var _ = Describe("machine", func() {
true,
metav1.Now(),
),
err: fmt.Errorf("Machine creation in process. Machine/Status UPDATE successful"),
err: fmt.Errorf("machine creation in process. Machine/Status UPDATE successful"),
retry: machineutils.ShortRetry,
},
}),
Expand Down Expand Up @@ -1068,6 +1068,7 @@ var _ = Describe("machine", func() {
Kind: "MachineClass",
Name: "machineClass",
},
ProviderID: "fakeID",
},
}, &v1alpha1.MachineStatus{
CurrentStatus: v1alpha1.CurrentStatus{
Expand All @@ -1079,7 +1080,7 @@ var _ = Describe("machine", func() {
State: v1alpha1.MachineStateFailed,
Type: v1alpha1.MachineOperationCreate,
},
}, nil, nil, nil, true, metav1.Now()),
}, nil, nil, map[string]string{v1alpha1.NodeLabelKey: "fakeNode-0"}, true, metav1.Now()),
err: status.Error(codes.Uninitialized, "VM instance could not be initialized"),
retry: machineutils.ShortRetry,
},
Expand Down Expand Up @@ -1503,7 +1504,7 @@ var _ = Describe("machine", func() {
},
},
expect: expect{
err: fmt.Errorf("Machine deletion in process. VM with matching ID found"),
err: fmt.Errorf("machine deletion in process. VM with matching ID found"),
retry: machineutils.ShortRetry,
machine: newMachine(
&v1alpha1.MachineTemplateSpec{
Expand Down
Loading

0 comments on commit 9bded0e

Please sign in to comment.