diff --git a/pkg/controller/node/node_controller.go b/pkg/controller/node/node_controller.go index a8c627c815..fdc3fcfe3b 100644 --- a/pkg/controller/node/node_controller.go +++ b/pkg/controller/node/node_controller.go @@ -467,6 +467,7 @@ func (ctrl *Controller) updateNode(old, cur interface{}) { daemonconsts.CurrentMachineConfigAnnotationKey, daemonconsts.DesiredMachineConfigAnnotationKey, daemonconsts.MachineConfigDaemonStateAnnotationKey, + daemonconsts.MachineUpdateHoldAnnotationKey, } for _, anno := range annos { newValue := curNode.Annotations[anno] @@ -758,10 +759,10 @@ func (ctrl *Controller) syncMachineConfigPool(key string) error { return err } - candidates, capacity := getAllCandidateMachines(pool, nodes, maxunavail) - if len(candidates) > 0 { - ctrl.logPool(pool, "%d candidate nodes for update, capacity: %d", len(candidates), capacity) - if err := ctrl.updateCandidateMachines(pool, candidates, capacity); err != nil { + state := getAllCandidateMachines(pool, nodes, maxunavail) + if len(state.candidates) > 0 { + ctrl.logPool(pool, "%d candidate nodes for update, capacity: %d", len(state.candidates), state.capacity) + if err := ctrl.updateCandidateMachines(pool, state.candidates, state.capacity); err != nil { if syncErr := ctrl.syncStatusOnly(pool); syncErr != nil { return goerrs.Wrapf(err, "error setting desired machine config annotation for pool %q, sync error: %v", pool.Name, syncErr) } @@ -833,20 +834,21 @@ func (ctrl *Controller) setDesiredMachineConfigAnnotation(nodeName, currentConfi }) } +type updateCandidateState struct { + candidates []*corev1.Node + unavail uint + held uint + capacity uint +} + // getAllCandidateMachines returns all possible nodes which can be updated to the target config, along with a maximum // capacity. It is the reponsibility of the caller to choose a subset of the nodes given the capacity. -func getAllCandidateMachines(pool *mcfgv1.MachineConfigPool, nodesInPool []*corev1.Node, maxUnavailable int) ([]*corev1.Node, uint) { +func getAllCandidateMachines(pool *mcfgv1.MachineConfigPool, nodesInPool []*corev1.Node, maxUnavailable int) updateCandidateState { targetConfig := pool.Spec.Configuration.Name - - unavail := getUnavailableMachines(nodesInPool) - // If we're at capacity, there's nothing to do. - if len(unavail) >= maxUnavailable { - return nil, 0 - } - capacity := maxUnavailable - len(unavail) - failingThisConfig := 0 + var failingThisConfig uint + ret := updateCandidateState{} // We only look at nodes which aren't already targeting our desired config - var nodes []*corev1.Node + var candidates []*corev1.Node for _, node := range nodesInPool { if node.Annotations[daemonconsts.DesiredMachineConfigAnnotationKey] == targetConfig { if isNodeMCDFailing(node) { @@ -855,69 +857,47 @@ func getAllCandidateMachines(pool *mcfgv1.MachineConfigPool, nodesInPool []*core continue } - nodes = append(nodes, node) + if _, ok := node.Annotations[daemonconsts.MachineUpdateHoldAnnotationKey]; ok { + ret.held++ + continue + } + + candidates = append(candidates, node) + } + + ret.unavail = uint(len(getUnavailableMachines(nodesInPool))) + // If we're at capacity, there's nothing to do. + if ret.unavail >= uint(maxUnavailable) { + ret.capacity = 0 + return ret } + ret.capacity = uint(maxUnavailable) - ret.unavail // Nodes which are failing to target this config also count against // availability - it might be a transient issue, and if the issue // clears we don't want multiple to update at once. - if failingThisConfig >= capacity { - return nil, 0 + if failingThisConfig >= ret.capacity { + ret.capacity = 0 + return ret } - capacity -= failingThisConfig + ret.capacity -= failingThisConfig + ret.candidates = candidates - return nodes, uint(capacity) + return ret } // getCandidateMachines returns the maximum subset of nodes which can be updated to the target config given availability constraints. func getCandidateMachines(pool *mcfgv1.MachineConfigPool, nodesInPool []*corev1.Node, maxUnavailable int) []*corev1.Node { - nodes, capacity := getAllCandidateMachines(pool, nodesInPool, maxUnavailable) - if uint(len(nodes)) < capacity { - return nodes + state := getAllCandidateMachines(pool, nodesInPool, maxUnavailable) + fmt.Printf("%+v\n", state) + if uint(len(state.candidates)) < state.capacity { + return state.candidates } - return nodes[:capacity] -} - -// getCurrentEtcdLeader is not yet implemented -func (ctrl *Controller) getCurrentEtcdLeader(candidates []*corev1.Node) (*corev1.Node, error) { - return nil, nil -} - -// filterControlPlaneCandidateNodes adjusts the candidates and capacity specifically -// for the control plane, e.g. based on which node is the etcd leader at the time. -// nolint:unparam -func (ctrl *Controller) filterControlPlaneCandidateNodes(pool *mcfgv1.MachineConfigPool, candidates []*corev1.Node, capacity uint) ([]*corev1.Node, uint, error) { - if len(candidates) <= 1 { - return candidates, capacity, nil - } - etcdLeader, err := ctrl.getCurrentEtcdLeader(candidates) - if err != nil { - glog.Warningf("Failed to find current etcd leader (continuing anyways): %v", err) - } - var newCandidates []*corev1.Node - for _, node := range candidates { - if node == etcdLeader { - // For now make this an event so we know it's working, even though it's more of a non-event - ctrl.eventRecorder.Eventf(pool, corev1.EventTypeNormal, "DeferringEtcdLeaderUpdate", "Deferring update of etcd leader %s", node.Name) - glog.Infof("Deferring update of etcd leader: %s", node.Name) - continue - } - newCandidates = append(newCandidates, node) - } - return newCandidates, capacity, nil + return state.candidates[:state.capacity] } // updateCandidateMachines sets the desiredConfig annotation the candidate machines func (ctrl *Controller) updateCandidateMachines(pool *mcfgv1.MachineConfigPool, candidates []*corev1.Node, capacity uint) error { - if pool.Name == masterPoolName { - var err error - candidates, capacity, err = ctrl.filterControlPlaneCandidateNodes(pool, candidates, capacity) - if err != nil { - return err - } - // In practice right now these counts will be 1 but let's stay general to support 5 etcd nodes in the future - ctrl.logPool(pool, "filtered to %d candidate nodes for update, capacity: %d", len(candidates), capacity) - } if capacity < uint(len(candidates)) { // Arbitrarily pick the first N candidates; no attempt at sorting. // Perhaps later we allow admins to weight somehow, or do something more intelligent. diff --git a/pkg/controller/node/node_controller_test.go b/pkg/controller/node/node_controller_test.go index 353dacc243..226c05106c 100644 --- a/pkg/controller/node/node_controller_test.go +++ b/pkg/controller/node/node_controller_test.go @@ -503,6 +503,7 @@ func TestMaxUnavailable(t *testing.T) { func TestGetCandidateMachines(t *testing.T) { tests := []struct { + desc string nodes []*corev1.Node progress int @@ -512,7 +513,7 @@ func TestGetCandidateMachines(t *testing.T) { // capacity is the maximum number of nodes we could update capacity uint }{{ - //no progress + desc: "no progress", progress: 1, nodes: []*corev1.Node{ newNodeWithReady("node-0", "v1", "v1", corev1.ConditionTrue), @@ -523,7 +524,7 @@ func TestGetCandidateMachines(t *testing.T) { otherCandidates: nil, capacity: 1, }, { - //no progress + desc: "no progress 2", progress: 1, nodes: []*corev1.Node{ newNodeWithReady("node-0", "v1", "v1", corev1.ConditionTrue), @@ -534,7 +535,7 @@ func TestGetCandidateMachines(t *testing.T) { otherCandidates: nil, capacity: 0, }, { - //no progress because we have an unavailable node + desc: "no progress because we have an unavailable node", progress: 1, nodes: []*corev1.Node{ newNodeWithReady("node-0", "v1", "v1", corev1.ConditionTrue), @@ -545,7 +546,7 @@ func TestGetCandidateMachines(t *testing.T) { otherCandidates: nil, capacity: 0, }, { - // node-2 is going to change config, so we can only progress one more + desc: "node-2 is going to change config, so we can only progress one more", progress: 3, nodes: []*corev1.Node{ newNodeWithReady("node-0", "v1", "v1", corev1.ConditionTrue), @@ -558,7 +559,7 @@ func TestGetCandidateMachines(t *testing.T) { otherCandidates: []string{"node-4"}, capacity: 1, }, { - // We have a node working, don't start anything else + desc: "We have a node working, don't start anything else", progress: 1, nodes: []*corev1.Node{ newNodeWithReady("node-0", "v1", "v1", corev1.ConditionTrue), @@ -571,7 +572,7 @@ func TestGetCandidateMachines(t *testing.T) { otherCandidates: nil, capacity: 0, }, { - //progress on old stuck node + desc: "progress on old stuck node", progress: 1, nodes: []*corev1.Node{ newNodeWithReady("node-0", "v1", "v1", corev1.ConditionTrue), @@ -582,7 +583,7 @@ func TestGetCandidateMachines(t *testing.T) { otherCandidates: []string{"node-2"}, capacity: 1, }, { - // Don't change a degraded node to same config, but also don't start another + desc: "Don't change a degraded node to same config, but also don't start another", progress: 1, nodes: []*corev1.Node{ newNodeWithReady("node-0", "v1", "v1", corev1.ConditionTrue), @@ -593,7 +594,7 @@ func TestGetCandidateMachines(t *testing.T) { otherCandidates: nil, capacity: 0, }, { - // Must be able to roll back + desc: "Must be able to roll back", progress: 1, nodes: []*corev1.Node{ newNodeWithReady("node-0", "v1", "v1", corev1.ConditionTrue), @@ -605,7 +606,7 @@ func TestGetCandidateMachines(t *testing.T) { otherCandidates: nil, capacity: 1, }, { - // Validate we also don't affect nodes which haven't started work + desc: "Validate we also don't affect nodes which haven't started work", progress: 1, nodes: []*corev1.Node{ newNodeWithReady("node-0", "v1", "v1", corev1.ConditionTrue), @@ -616,7 +617,7 @@ func TestGetCandidateMachines(t *testing.T) { otherCandidates: nil, capacity: 0, }, { - // A test with more nodes in mixed order + desc: "A test with more nodes in mixed order", progress: 4, nodes: []*corev1.Node{ newNodeWithReady("node-0", "v1", "v1", corev1.ConditionTrue), @@ -632,10 +633,28 @@ func TestGetCandidateMachines(t *testing.T) { expected: []string{"node-3", "node-4"}, otherCandidates: []string{"node-5", "node-6"}, capacity: 2, + }, { + desc: "A test with more nodes in mixed order", + progress: 4, + nodes: []*corev1.Node{ + newNodeWithReady("node-0", "v1", "v1", corev1.ConditionTrue), + newNodeWithReady("node-1", "v1", "v1", corev1.ConditionFalse), + newNodeWithReady("node-2", "v0", "v1", corev1.ConditionTrue), + newNodeWithReadyButHeld("node-3", "v0", "v0", corev1.ConditionTrue), + newNodeWithReady("node-4", "v0", "v0", corev1.ConditionTrue), + newNodeWithReadyButHeld("node-5", "v0", "v0", corev1.ConditionTrue), + newNodeWithReady("node-6", "v0", "v0", corev1.ConditionTrue), + newNodeWithReady("node-7", "v1", "v1", corev1.ConditionTrue), + newNodeWithReady("node-8", "v1", "v1", corev1.ConditionTrue), + newNodeWithReady("node-9", "v0", "v0", corev1.ConditionTrue), + }, + expected: []string{"node-4", "node-6"}, + otherCandidates: []string{"node-9"}, + capacity: 2, }} for idx, test := range tests { - t.Run(fmt.Sprintf("case#%d", idx), func(t *testing.T) { + t.Run(fmt.Sprintf("case#%d: %s", idx, test.desc), func(t *testing.T) { pool := &mcfgv1.MachineConfigPool{ Spec: mcfgv1.MachineConfigPoolSpec{ Configuration: mcfgv1.MachineConfigPoolStatusConfiguration{ObjectReference: corev1.ObjectReference{Name: "v1"}}, @@ -649,10 +668,10 @@ func TestGetCandidateMachines(t *testing.T) { } assert.Equal(t, test.expected, nodeNames) - allCandidates, capacity := getAllCandidateMachines(pool, test.nodes, test.progress) - assert.Equal(t, test.capacity, capacity) + state := getAllCandidateMachines(pool, test.nodes, test.progress) + assert.Equal(t, test.capacity, state.capacity) var otherCandidates []string - for i, node := range allCandidates { + for i, node := range state.candidates { if i < len(nodeNames) { assert.Equal(t, node.Name, nodeNames[i]) } else { diff --git a/pkg/controller/node/status_test.go b/pkg/controller/node/status_test.go index a4eab59fd7..5aff9e5198 100644 --- a/pkg/controller/node/status_test.go +++ b/pkg/controller/node/status_test.go @@ -108,6 +108,16 @@ func newNodeWithReady(name string, currentConfig, desiredConfig string, status c return node } +func newNodeWithReadyButHeld(name string, currentConfig, desiredConfig string, status corev1.ConditionStatus) *corev1.Node { + node := newNode(name, currentConfig, desiredConfig) + node.Status = corev1.NodeStatus{Conditions: []corev1.NodeCondition{{Type: corev1.NodeReady, Status: status}}} + if node.Annotations == nil { + node.Annotations = map[string]string{} + } + node.Annotations[daemonconsts.MachineUpdateHoldAnnotationKey] = "true" + return node +} + func newNodeWithReadyAndDaemonState(name string, currentConfig, desiredConfig string, status corev1.ConditionStatus, dstate string) *corev1.Node { node := newNode(name, currentConfig, desiredConfig) node.Status = corev1.NodeStatus{Conditions: []corev1.NodeCondition{{Type: corev1.NodeReady, Status: status}}} diff --git a/pkg/daemon/constants/constants.go b/pkg/daemon/constants/constants.go index 746051be92..aa68db032a 100644 --- a/pkg/daemon/constants/constants.go +++ b/pkg/daemon/constants/constants.go @@ -14,6 +14,8 @@ const ( DesiredMachineConfigAnnotationKey = "machineconfiguration.openshift.io/desiredConfig" // MachineConfigDaemonStateAnnotationKey is used to fetch the state of the daemon on the machine. MachineConfigDaemonStateAnnotationKey = "machineconfiguration.openshift.io/state" + // MachineUpdateHoldAnnotationKey is used to skip specific nodes for updates + MachineUpdateHoldAnnotationKey = "machineconfiguration.openshift.io/hold" // OpenShiftOperatorManagedLabel is used to filter out kube objects that don't need to be synced by the MCO OpenShiftOperatorManagedLabel = "openshift.io/operator-managed" // MachineConfigDaemonStateWorking is set by daemon when it is applying an update.