diff --git a/pkg/manager/member/pd_upgrader.go b/pkg/manager/member/pd_upgrader.go index a7327f25053..5b82096de47 100644 --- a/pkg/manager/member/pd_upgrader.go +++ b/pkg/manager/member/pd_upgrader.go @@ -15,6 +15,7 @@ package member import ( "fmt" + "strconv" "github.com/pingcap/advanced-statefulset/client/apis/apps/v1/helper" "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" @@ -24,12 +25,18 @@ import ( "github.com/pingcap/tidb-operator/pkg/third_party/k8s" apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/klog/v2" ) const ( // set this PD clustre annotation to true to fail cluster upgrade if PD loose the quorum during one pod restart annoKeyPDPeersCheck = "tidb.pingcap.com/pd-check-quorum-before-upgrade" + + // TODO: change to use minReadySeconds in sts spec + // See https://kubernetes.io/blog/2021/08/27/minreadyseconds-statefulsets/ + annoKeyPDMinReadySeconds = "tidb.pingcap.com/pd-min-ready-seconds" ) type pdUpgrader struct { @@ -79,6 +86,17 @@ func (u *pdUpgrader) gracefulUpgrade(tc *v1alpha1.TidbCluster, oldSet *apps.Stat return nil } + minReadySeconds := 0 + s, ok := tc.Annotations[annoKeyPDMinReadySeconds] + if ok { + i, err := strconv.Atoi(s) + if err != nil { + klog.Warningf("tidbcluster: [%s/%s] annotation %s should be an integer: %v", ns, tcName, annoKeyPDMinReadySeconds, err) + } else { + minReadySeconds = i + } + } + mngerutils.SetUpgradePartition(newSet, *oldSet.Spec.UpdateStrategy.RollingUpdate.Partition) podOrdinals := helper.GetPodOrdinals(*oldSet.Spec.Replicas, oldSet).List() for _i := len(podOrdinals) - 1; _i >= 0; _i-- { @@ -95,8 +113,13 @@ func (u *pdUpgrader) gracefulUpgrade(tc *v1alpha1.TidbCluster, oldSet *apps.Stat } if revision == tc.Status.PD.StatefulSet.UpdateRevision { - if !k8s.IsPodReady(pod) { - return controller.RequeueErrorf("tidbcluster: [%s/%s]'s upgraded pd pod: [%s] is not ready", ns, tcName, podName) + if !k8s.IsPodAvailable(pod, int32(minReadySeconds), metav1.Now()) { + readyCond := k8s.GetPodReadyCondition(pod.Status) + if readyCond == nil || readyCond.Status != corev1.ConditionTrue { + return controller.RequeueErrorf("tidbcluster: [%s/%s]'s upgraded pd pod: [%s] is not ready", ns, tcName, podName) + + } + return controller.RequeueErrorf("tidbcluster: [%s/%s]'s upgraded pd pod: [%s] is not available, last transition time is %v", ns, tcName, podName, readyCond.LastTransitionTime) } if member, exist := tc.Status.PD.Members[PdName(tc.Name, i, tc.Namespace, tc.Spec.ClusterDomain, tc.Spec.AcrossK8s)]; !exist || !member.Health { return controller.RequeueErrorf("tidbcluster: [%s/%s]'s pd upgraded pod: [%s] is not health", ns, tcName, podName) diff --git a/pkg/manager/member/pd_upgrader_test.go b/pkg/manager/member/pd_upgrader_test.go index f9b80a6c7d7..e0270d60ecc 100644 --- a/pkg/manager/member/pd_upgrader_test.go +++ b/pkg/manager/member/pd_upgrader_test.go @@ -314,6 +314,30 @@ func TestPDUpgraderUpgrade(t *testing.T) { g.Expect(newSet.Spec.UpdateStrategy.RollingUpdate.Partition).To(Equal(pointer.Int32Ptr(1))) }, }, + { + name: "upgraded pod is ready but not available", + changeFn: func(tc *v1alpha1.TidbCluster) { + tc.Status.PD.Synced = true + if tc.Annotations == nil { + tc.Annotations = map[string]string{} + } + // 5min is enough for unit test + tc.Annotations[annoKeyTiDBMinReadySeconds] = "300" + }, + changePods: func(pods []*corev1.Pod) { + pods[1].Status.Conditions[0].LastTransitionTime = metav1.Now() + }, + changeOldSet: nil, + transferLeaderErr: false, + pdPeersAreUnstable: true, + errExpectFn: func(g *GomegaWithT, err error) { + g.Expect(err).NotTo(HaveOccurred()) + }, + expectFn: func(g *GomegaWithT, tc *v1alpha1.TidbCluster, newSet *apps.StatefulSet) { + g.Expect(tc.Status.PD.Phase).To(Equal(v1alpha1.UpgradePhase)) + g.Expect(newSet.Spec.UpdateStrategy.RollingUpdate.Partition).To(Equal(pointer.Int32Ptr(1))) + }, + }, } for i := range tests {