Skip to content

Commit

Permalink
K0sControlPlane update strategies
Browse files Browse the repository at this point in the history
Signed-off-by: Alexey Makhov <[email protected]>
  • Loading branch information
makhov committed May 14, 2024
1 parent 6b623d2 commit 4e1d2a8
Show file tree
Hide file tree
Showing 10 changed files with 196 additions and 96 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -230,4 +230,6 @@ jobs:
with:
name: smoketests-${{ matrix.smoke-suite }}-files
path: |
/tmp/${{ matrix.smoke-suite }}-k0smotron.log
/tmp/${{ matrix.smoke-suite }}-k0smotron-bootstrap.log
/tmp/${{ matrix.smoke-suite }}-k0smotron-control-plane.log
/tmp/${{ matrix.smoke-suite }}-k0smotron-infrastructure.log
4 changes: 4 additions & 0 deletions api/controlplane/v1beta1/k0s_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ type K0sControlPlaneSpec struct {
//+kubebuilder:validation:Optional
//+kubebuilder:default=1
Replicas int32 `json:"replicas,omitempty"`
//+kubebuilder:validation:Optional
//+kubebuilder:validation:Enum:rollout;inplace
//+kubebuilder:default=inplace
UpdateStrategy string `json:"updateStrategy,omitempty"`
// Version defines the k0s version to be deployed. You can use a specific k0s version (e.g. v1.27.1+k0s.0) or
// just the Kubernetes version (e.g. v1.27.1). If left empty, k0smotron will select one automatically.
//+kubebuilder:validation:Optional
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,9 @@ spec:
default: 1
format: int32
type: integer
updateStrategy:
default: inplace
type: string
version:
description: |-
Version defines the k0s version to be deployed. You can use a specific k0s version (e.g. v1.27.1+k0s.0) or
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,9 @@ spec:
default: 1
format: int32
type: integer
updateStrategy:
default: inplace
type: string
version:
description: |-
Version defines the k0s version to be deployed. You can use a specific k0s version (e.g. v1.27.1+k0s.0) or
Expand Down
9 changes: 9 additions & 0 deletions docs/resource-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -1180,6 +1180,15 @@ Resource Types:
<i>Default</i>: 1<br/>
</td>
<td>false</td>
</tr><tr>
<td><b>updateStrategy</b></td>
<td>string</td>
<td>
<br/>
<br/>
<i>Default</i>: inplace<br/>
</td>
<td>false</td>
</tr><tr>
<td><b>version</b></td>
<td>string</td>
Expand Down
79 changes: 40 additions & 39 deletions internal/controller/bootstrap/controlplane_bootstrap_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ import (
"sigs.k8s.io/cluster-api/util"
capiutil "sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/annotations"
"sigs.k8s.io/cluster-api/util/collections"
"sigs.k8s.io/cluster-api/util/secret"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -179,14 +180,20 @@ func (c *ControlPlaneController) Reconcile(ctx context.Context, req ctrl.Request
return ctrl.Result{}, fmt.Errorf("control plane endpoint is not set")
}

if strings.HasSuffix(config.Name, "-0") {
machines, err := collections.GetFilteredMachinesForCluster(ctx, c.Client, cluster, collections.ControlPlaneMachines(cluster.Name), collections.ActiveMachines)
if err != nil {
return ctrl.Result{}, fmt.Errorf("error collecting machines: %w", err)
}

if machines.Oldest().Name == config.Name {
files, err = c.genInitialControlPlaneFiles(ctx, scope, files)
if err != nil {
return ctrl.Result{}, fmt.Errorf("error generating initial control plane files: %v", err)
}
installCmd = createCPInstallCmd(config)
} else {
files, err = c.genControlPlaneJoinFiles(ctx, scope, config, files)
files, err = c.genControlPlaneJoinFiles(ctx, scope, files, machines.Oldest())
//files, err = c.genControlPlaneJoinFiles(ctx, scope, config, files)
if err != nil {
return ctrl.Result{}, fmt.Errorf("error generating control plane join files: %v", err)
}
Expand Down Expand Up @@ -289,7 +296,7 @@ func (c *ControlPlaneController) genInitialControlPlaneFiles(ctx context.Context
return files, nil
}

func (c *ControlPlaneController) genControlPlaneJoinFiles(ctx context.Context, scope *Scope, config *bootstrapv1.K0sControllerConfig, files []cloudinit.File) ([]cloudinit.File, error) {
func (c *ControlPlaneController) genControlPlaneJoinFiles(ctx context.Context, scope *Scope, files []cloudinit.File, firstControllerMachine *clusterv1.Machine) ([]cloudinit.File, error) {
log := log.FromContext(ctx).WithValues("K0sControllerConfig cluster", scope.Cluster.Name)

_, ca, err := c.getCerts(ctx, scope)
Expand All @@ -316,7 +323,7 @@ func (c *ControlPlaneController) genControlPlaneJoinFiles(ctx context.Context, s
return nil, err
}

host, err := c.findFirstControllerIP(ctx, config)
host, err := c.findFirstControllerIP(ctx, firstControllerMachine)
if err != nil {
log.Error(err, "Failed to get controller IP")
return nil, err
Expand Down Expand Up @@ -526,22 +533,9 @@ func createCPInstallCmdWithJoinToken(config *bootstrapv1.K0sControllerConfig, to
return strings.Join(installCmd, " ")
}

func (c *ControlPlaneController) findFirstControllerIP(ctx context.Context, config *bootstrapv1.K0sControllerConfig) (string, error) {
// Dirty first controller name generation
nameParts := strings.Split(config.Name, "-")
nameParts[len(nameParts)-1] = "0"
name := strings.Join(nameParts, "-")
machine, machineImpl, err := c.getMachineImplementation(ctx, name, config)
if err != nil {
return "", fmt.Errorf("error getting machine implementation: %w", err)
}
addresses, found, err := unstructured.NestedSlice(machineImpl.UnstructuredContent(), "status", "addresses")
if err != nil {
return "", err
}

func (c *ControlPlaneController) findFirstControllerIP(ctx context.Context, firstControllerMachine *clusterv1.Machine) (string, error) {
extAddr, intAddr := "", ""
for _, addr := range machine.Status.Addresses {
for _, addr := range firstControllerMachine.Status.Addresses {
if addr.Type == clusterv1.MachineExternalIP {
extAddr = addr.Address
break
Expand All @@ -552,16 +546,29 @@ func (c *ControlPlaneController) findFirstControllerIP(ctx context.Context, conf
}
}

if found {
for _, addr := range addresses {
addrMap, _ := addr.(map[string]interface{})
if addrMap["type"] == string(v1.NodeExternalIP) {
extAddr = addrMap["address"].(string)
break
}
if addrMap["type"] == string(v1.NodeInternalIP) {
intAddr = addrMap["address"].(string)
break
name := firstControllerMachine.Name

if extAddr == "" && intAddr == "" {
machineImpl, err := c.getMachineImplementation(ctx, firstControllerMachine)
if err != nil {
return "", fmt.Errorf("error getting machine implementation: %w", err)
}
addresses, found, err := unstructured.NestedSlice(machineImpl.UnstructuredContent(), "status", "addresses")
if err != nil {
return "", err
}

if found {
for _, addr := range addresses {
addrMap, _ := addr.(map[string]interface{})
if addrMap["type"] == string(v1.NodeExternalIP) {
extAddr = addrMap["address"].(string)
break
}
if addrMap["type"] == string(v1.NodeInternalIP) {
intAddr = addrMap["address"].(string)
break
}
}
}
}
Expand All @@ -577,13 +584,7 @@ func (c *ControlPlaneController) findFirstControllerIP(ctx context.Context, conf
return "", fmt.Errorf("no address found for machine %s", name)
}

func (c *ControlPlaneController) getMachineImplementation(ctx context.Context, name string, config *bootstrapv1.K0sControllerConfig) (*clusterv1.Machine, *unstructured.Unstructured, error) {
var machine clusterv1.Machine
err := c.Get(ctx, client.ObjectKey{Name: name, Namespace: config.Namespace}, &machine)
if err != nil {
return nil, nil, fmt.Errorf("error getting machine object: %w", err)
}

func (c *ControlPlaneController) getMachineImplementation(ctx context.Context, machine *clusterv1.Machine) (*unstructured.Unstructured, error) {
infRef := machine.Spec.InfrastructureRef

machineImpl := new(unstructured.Unstructured)
Expand All @@ -593,11 +594,11 @@ func (c *ControlPlaneController) getMachineImplementation(ctx context.Context, n

key := client.ObjectKey{Name: infRef.Name, Namespace: infRef.Namespace}

err = c.Get(ctx, key, machineImpl)
err := c.Get(ctx, key, machineImpl)
if err != nil {
return nil, nil, fmt.Errorf("error getting machine implementation object: %w", err)
return nil, fmt.Errorf("error getting machine implementation object: %w", err)
}
return &machine, machineImpl, nil
return machineImpl, nil
}

func genShutdownServiceFiles() []cloudinit.File {
Expand Down
145 changes: 97 additions & 48 deletions internal/controller/controlplane/k0s_controlplane_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,23 @@ import (
"context"
"errors"
"fmt"
"github.com/Masterminds/semver"
"github.com/google/uuid"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apiserver/pkg/storage/names"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/utils/pointer"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
kubeadmbootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1"
capiutil "sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/annotations"
"sigs.k8s.io/cluster-api/util/collections"
"sigs.k8s.io/cluster-api/util/kubeconfig"
"sigs.k8s.io/cluster-api/util/secret"
ctrl "sigs.k8s.io/controller-runtime"
Expand Down Expand Up @@ -210,63 +213,42 @@ func (c *K0sController) reconcile(ctx context.Context, cluster *clusterv1.Cluste

func (c *K0sController) reconcileMachines(ctx context.Context, cluster *clusterv1.Cluster, kcp *cpv1beta1.K0sControlPlane) (int32, error) {
replicasToReport := kcp.Spec.Replicas
// TODO: Scale down machines if needed
if kcp.Status.Replicas > kcp.Spec.Replicas {
kubeClient, err := c.getKubeClient(ctx, cluster)
if err != nil {
return replicasToReport, fmt.Errorf("error getting cluster client set for deletion: %w", err)
}

// Remove the last machine and report the new number of replicas to status
// On the next reconcile, the next machine will be removed
if kcp.Status.Replicas > kcp.Spec.Replicas {
// Wait for the previous machine to be deleted to avoid etcd issues
previousMachineName := machineName(kcp.Name, int(kcp.Status.Replicas))
exist, err := c.machineExist(ctx, previousMachineName, kcp)
if err != nil {
return kcp.Status.Replicas, fmt.Errorf("error checking machine existance: %w", err)
}
if exist {
return kcp.Status.Replicas, fmt.Errorf("waiting for previous machine to be deleted")
}

replicasToReport = kcp.Status.Replicas - 1
name := machineName(kcp.Name, int(kcp.Status.Replicas-1))

if err := c.markChildControlNodeToLeave(ctx, name, kubeClient); err != nil {
return replicasToReport, fmt.Errorf("error marking controlnode to leave: %w", err)
}

if err := c.deleteBootstrapConfig(ctx, name, kcp); err != nil {
return replicasToReport, fmt.Errorf("error deleting machine from template: %w", err)
}

if err := c.deleteMachineFromTemplate(ctx, name, cluster, kcp); err != nil {
return replicasToReport, fmt.Errorf("error deleting machine from template: %w", err)
}

if err := c.deleteMachine(ctx, name, kcp); err != nil {
return replicasToReport, fmt.Errorf("error deleting machine from template: %w", err)
}
machines, err := collections.GetFilteredMachinesForCluster(ctx, c, cluster, collections.ControlPlaneMachines(cluster.Name), collections.ActiveMachines)
if err != nil {
return replicasToReport, fmt.Errorf("error collecting machines: %w", err)
}

return replicasToReport, nil
}
currentReplicas := machines.Len()
desiredReplicas := kcp.Spec.Replicas
machinesToDelete := 0
if currentReplicas > int(desiredReplicas) {
machinesToDelete = currentReplicas - int(desiredReplicas)
replicasToReport = kcp.Status.Replicas
}

if kcp.Status.Version != "" && kcp.Spec.Version != kcp.Status.Version {
kubeClient, err := c.getKubeClient(ctx, cluster)
if err != nil {
return replicasToReport, fmt.Errorf("error getting cluster client set for machine update: %w", err)
}
if kcp.Spec.UpdateStrategy == "" {
desiredReplicas += kcp.Spec.Replicas
machinesToDelete = int(kcp.Spec.Replicas)
replicasToReport = desiredReplicas
} else {
kubeClient, err := c.getKubeClient(ctx, cluster)
if err != nil {
return replicasToReport, fmt.Errorf("error getting cluster client set for machine update: %w", err)
}

err = c.createAutopilotPlan(ctx, kcp, kubeClient)
if err != nil {
return replicasToReport, fmt.Errorf("error creating autopilot plan: %w", err)
err = c.createAutopilotPlan(ctx, kcp, kubeClient)
if err != nil {
return replicasToReport, fmt.Errorf("error creating autopilot plan: %w", err)
}
}
}

for i := 0; i < int(kcp.Spec.Replicas); i++ {
name := machineName(kcp.Name, i)
for i := currentReplicas; i < int(desiredReplicas); i++ {
name := names.SimpleNameGenerator.GenerateName(fmt.Sprintf("%s-%d", kcp.Name, i))
//for i := 0; i < int(kcp.Spec.Replicas); i++ {
// name := machineName(kcp.Name, i)

machineFromTemplate, err := c.createMachineFromTemplate(ctx, name, cluster, kcp)
if err != nil {
Expand All @@ -291,6 +273,73 @@ func (c *K0sController) reconcileMachines(ctx context.Context, cluster *clusterv
}
}

var isNewMachineReady bool
for _, m := range machines {
ver := semver.MustParse(kcp.Spec.Version)
if m.Spec.Version != nil && *m.Spec.Version == fmt.Sprintf("%d.%d.%d", ver.Major(), ver.Minor(), ver.Patch()) {
continue
}
if m.Status.Phase == string(clusterv1.MachinePhaseProvisioning) ||
m.Status.Phase == string(clusterv1.MachinePhaseProvisioned) ||
m.Status.Phase == string(clusterv1.MachinePhaseRunning) {
isNewMachineReady = true
break
}
}

if machinesToDelete > 0 && !isNewMachineReady {
return replicasToReport, fmt.Errorf("waiting for new machines")
}

//if kcp.Status.Version != "" && kcp.Spec.Version != kcp.Status.Version {
// kubeClient, err := c.getKubeClient(ctx, cluster)
// if err != nil {
// return replicasToReport, fmt.Errorf("error getting cluster client set for machine update: %w", err)
// }
//
// err = c.createAutopilotPlan(ctx, kcp, kubeClient)
// if err != nil {
// return replicasToReport, fmt.Errorf("error creating autopilot plan: %w", err)
// }
//}

// TODO: Scale down machines if needed
//if kcp.Status.Replicas > kcp.Spec.Replicas {
if machinesToDelete > 0 {
kubeClient, err := c.getKubeClient(ctx, cluster)
if err != nil {
return replicasToReport, fmt.Errorf("error getting cluster client set for deletion: %w", err)
}

// Remove the last machine and report the new number of replicas to status
// On the next reconcile, the next machine will be removed
// Wait for the previous machine to be deleted to avoid etcd issues
machine := machines.Oldest()
if machine.Status.Phase == string(clusterv1.MachinePhaseDeleting) {
return kcp.Status.Replicas, fmt.Errorf("waiting for previous machine to be deleted")
}

replicasToReport -= 1

Check warning on line 322 in internal/controller/controlplane/k0s_controlplane_controller.go

View workflow job for this annotation

GitHub Actions / Lint

increment-decrement: should replace replicasToReport -= 1 with replicasToReport-- (revive)
name := machine.Name
if err := c.markChildControlNodeToLeave(ctx, name, kubeClient); err != nil {
return replicasToReport, fmt.Errorf("error marking controlnode to leave: %w", err)
}

if err := c.deleteBootstrapConfig(ctx, name, kcp); err != nil {
return replicasToReport, fmt.Errorf("error deleting machine from template: %w", err)
}

if err := c.deleteMachineFromTemplate(ctx, name, cluster, kcp); err != nil {
return replicasToReport, fmt.Errorf("error deleting machine from template: %w", err)
}

if err := c.deleteMachine(ctx, name, kcp); err != nil {
return replicasToReport, fmt.Errorf("error deleting machine from template: %w", err)
}

return replicasToReport, nil
}

return replicasToReport, nil
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ func (r *RemoteMachineController) returnMachineToPool(ctx context.Context, rm *i
}
}
log := log.FromContext(ctx).WithValues("remotemachine", rm.Name)
log.Error(fmt.Errorf("no pooled machine found for remote machine"), rm.Namespace, rm.Name)
log.Error(fmt.Errorf("no pooled machine found for remote machine"), "failed to found pooled remote machine", "namespace", rm.Namespace, "name", rm.Name)

return nil
}
Expand Down
Loading

0 comments on commit 4e1d2a8

Please sign in to comment.