From 35ffe71768a2bb51e83a42f766de7894391b60c8 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Wed, 14 Oct 2020 20:42:37 -0400 Subject: [PATCH] node-controller: Support an annotation to hold/prioritize updates Today the MCO arbitrarily chooses a node to update from the candidates. We want to allow admins to avoid specific nodes entirely (for as long as they want) as well as guide upgrade ordering. This replaces the defunct etcd-specific code with support for a generic annotation `machineconfiguration.openshift.io/update-order` that allows an external controller (and/or human) to do both of these. Setting it to `0` will entirely skip that node for updates. Otherwise, higher values are preferred. Closes: https://github.com/openshift/machine-config-operator/issues/2059 --- pkg/controller/node/node_controller.go | 65 +++++++++++---------- pkg/controller/node/node_controller_test.go | 17 ++++++ pkg/controller/node/status_test.go | 10 ++++ pkg/daemon/constants/constants.go | 2 + 4 files changed, 62 insertions(+), 32 deletions(-) diff --git a/pkg/controller/node/node_controller.go b/pkg/controller/node/node_controller.go index a8c627c815..d05abb303b 100644 --- a/pkg/controller/node/node_controller.go +++ b/pkg/controller/node/node_controller.go @@ -5,6 +5,8 @@ import ( "encoding/json" "fmt" "reflect" + "sort" + "strconv" "time" "github.com/golang/glog" @@ -866,7 +868,7 @@ func getAllCandidateMachines(pool *mcfgv1.MachineConfigPool, nodesInPool []*core } capacity -= failingThisConfig - return nodes, uint(capacity) + return prioritizeCandidateNodes(nodes), uint(capacity) } // getCandidateMachines returns the maximum subset of nodes which can be updated to the target config given availability constraints. @@ -878,46 +880,45 @@ func getCandidateMachines(pool *mcfgv1.MachineConfigPool, nodesInPool []*corev1. return nodes[:capacity] } -// getCurrentEtcdLeader is not yet implemented -func (ctrl *Controller) getCurrentEtcdLeader(candidates []*corev1.Node) (*corev1.Node, error) { - return nil, nil +type nodeOrdering struct { + nodes []*corev1.Node + order map[*corev1.Node]int } -// filterControlPlaneCandidateNodes adjusts the candidates and capacity specifically -// for the control plane, e.g. based on which node is the etcd leader at the time. -// nolint:unparam -func (ctrl *Controller) filterControlPlaneCandidateNodes(pool *mcfgv1.MachineConfigPool, candidates []*corev1.Node, capacity uint) ([]*corev1.Node, uint, error) { - if len(candidates) <= 1 { - return candidates, capacity, nil - } - etcdLeader, err := ctrl.getCurrentEtcdLeader(candidates) - if err != nil { - glog.Warningf("Failed to find current etcd leader (continuing anyways): %v", err) - } - var newCandidates []*corev1.Node +func (p nodeOrdering) Len() int { return len(p.nodes) } +func (p nodeOrdering) Less(i, j int) bool { return p.order[p.nodes[i]] > p.order[p.nodes[j]] } +func (p nodeOrdering) Swap(i, j int) { p.nodes[i], p.nodes[j] = p.nodes[j], p.nodes[i] } + +// prioritizeCandidateNodes chooses the update ordering +func prioritizeCandidateNodes(candidates []*corev1.Node) []*corev1.Node { + var newCandidates nodeOrdering + newCandidates.order = make(map[*corev1.Node]int) for _, node := range candidates { - if node == etcdLeader { - // For now make this an event so we know it's working, even though it's more of a non-event - ctrl.eventRecorder.Eventf(pool, corev1.EventTypeNormal, "DeferringEtcdLeaderUpdate", "Deferring update of etcd leader %s", node.Name) - glog.Infof("Deferring update of etcd leader: %s", node.Name) - continue + orderVal, ok := node.Annotations[daemonconsts.MachineUpdateOrderingAnnotationKey] + if ok { + order, err := strconv.Atoi(orderVal) + if err != nil { + glog.Warningf("Failed to parse %s %s: %v", node.Name, daemonconsts.MachineUpdateOrderingAnnotationKey, err) + continue + } + // order 0 means "skip this node" + if order == 0 { + continue + } + if order < 0 { + glog.Warningf("Invalid negative ordering %s for node %s", orderVal, node.Name) + continue + } + newCandidates.order[node] = order } - newCandidates = append(newCandidates, node) + newCandidates.nodes = append(newCandidates.nodes, node) } - return newCandidates, capacity, nil + sort.Sort(newCandidates) + return newCandidates.nodes } // updateCandidateMachines sets the desiredConfig annotation the candidate machines func (ctrl *Controller) updateCandidateMachines(pool *mcfgv1.MachineConfigPool, candidates []*corev1.Node, capacity uint) error { - if pool.Name == masterPoolName { - var err error - candidates, capacity, err = ctrl.filterControlPlaneCandidateNodes(pool, candidates, capacity) - if err != nil { - return err - } - // In practice right now these counts will be 1 but let's stay general to support 5 etcd nodes in the future - ctrl.logPool(pool, "filtered to %d candidate nodes for update, capacity: %d", len(candidates), capacity) - } if capacity < uint(len(candidates)) { // Arbitrarily pick the first N candidates; no attempt at sorting. // Perhaps later we allow admins to weight somehow, or do something more intelligent. diff --git a/pkg/controller/node/node_controller_test.go b/pkg/controller/node/node_controller_test.go index 353dacc243..38e8cb5ffa 100644 --- a/pkg/controller/node/node_controller_test.go +++ b/pkg/controller/node/node_controller_test.go @@ -632,6 +632,23 @@ func TestGetCandidateMachines(t *testing.T) { expected: []string{"node-3", "node-4"}, otherCandidates: []string{"node-5", "node-6"}, capacity: 2, + }, { + // A test with more nodes in mixed order + progress: 4, + nodes: []*corev1.Node{ + newNodeWithReady("node-0", "v1", "v1", corev1.ConditionTrue), + newNodeWithReady("node-1", "v1", "v1", corev1.ConditionFalse), + newNodeWithReady("node-2", "v0", "v1", corev1.ConditionTrue), + newNodeWithReadyAndOrder("node-3", "v0", "v0", 0, corev1.ConditionTrue), + newNodeWithReadyAndOrder("node-4", "v0", "v0", 5, corev1.ConditionTrue), + newNodeWithReadyAndOrder("node-5", "v0", "v0", 10, corev1.ConditionTrue), + newNodeWithReady("node-6", "v0", "v0", corev1.ConditionTrue), + newNodeWithReady("node-7", "v1", "v1", corev1.ConditionTrue), + newNodeWithReady("node-8", "v1", "v1", corev1.ConditionTrue), + }, + expected: []string{"node-5", "node-4"}, + otherCandidates: []string{"node-6"}, + capacity: 2, }} for idx, test := range tests { diff --git a/pkg/controller/node/status_test.go b/pkg/controller/node/status_test.go index a4eab59fd7..f25226875f 100644 --- a/pkg/controller/node/status_test.go +++ b/pkg/controller/node/status_test.go @@ -108,6 +108,16 @@ func newNodeWithReady(name string, currentConfig, desiredConfig string, status c return node } +func newNodeWithReadyAndOrder(name string, currentConfig, desiredConfig string, order int, status corev1.ConditionStatus) *corev1.Node { + node := newNode(name, currentConfig, desiredConfig) + node.Status = corev1.NodeStatus{Conditions: []corev1.NodeCondition{{Type: corev1.NodeReady, Status: status}}} + if node.Annotations == nil { + node.Annotations = map[string]string{} + } + node.Annotations[daemonconsts.MachineUpdateOrderingAnnotationKey] = fmt.Sprintf("%d", order) + return node +} + func newNodeWithReadyAndDaemonState(name string, currentConfig, desiredConfig string, status corev1.ConditionStatus, dstate string) *corev1.Node { node := newNode(name, currentConfig, desiredConfig) node.Status = corev1.NodeStatus{Conditions: []corev1.NodeCondition{{Type: corev1.NodeReady, Status: status}}} diff --git a/pkg/daemon/constants/constants.go b/pkg/daemon/constants/constants.go index 746051be92..fb1f5d2560 100644 --- a/pkg/daemon/constants/constants.go +++ b/pkg/daemon/constants/constants.go @@ -14,6 +14,8 @@ const ( DesiredMachineConfigAnnotationKey = "machineconfiguration.openshift.io/desiredConfig" // MachineConfigDaemonStateAnnotationKey is used to fetch the state of the daemon on the machine. MachineConfigDaemonStateAnnotationKey = "machineconfiguration.openshift.io/state" + // MachineUpdateOrderingAnnotationKey is used to specify the desired ordering for an update + MachineUpdateOrderingAnnotationKey = "machineconfiguration.openshift.io/update-order" // OpenShiftOperatorManagedLabel is used to filter out kube objects that don't need to be synced by the MCO OpenShiftOperatorManagedLabel = "openshift.io/operator-managed" // MachineConfigDaemonStateWorking is set by daemon when it is applying an update.