From 96dc403cf533d904e514d4ed6bfd59df97c28271 Mon Sep 17 00:00:00 2001 From: Tero Saarni Date: Wed, 23 Oct 2024 06:28:46 +0300 Subject: [PATCH] Poll synctracker HasSynced() during startup (#6614) The "upstream" informer trackers do not always return HasSynced() == true immediately, even if we have already processed all events the informer dispatched us. Change to polling so that we can progress also in that case. Signed-off-by: Tero Saarni --- changelogs/unreleased/6614-tsaarni-small.md | 1 + internal/contour/handler.go | 30 ++++++++++----------- 2 files changed, 16 insertions(+), 15 deletions(-) create mode 100644 changelogs/unreleased/6614-tsaarni-small.md diff --git a/changelogs/unreleased/6614-tsaarni-small.md b/changelogs/unreleased/6614-tsaarni-small.md new file mode 100644 index 00000000000..1eac9f3117a --- /dev/null +++ b/changelogs/unreleased/6614-tsaarni-small.md @@ -0,0 +1 @@ +Fixed a bug where follower Contour instance occasionally got stuck in a non-ready state when using `--watch-namespaces` flag. diff --git a/internal/contour/handler.go b/internal/contour/handler.go index a66d590f158..69c2077afdd 100644 --- a/internal/contour/handler.go +++ b/internal/contour/handler.go @@ -146,6 +146,15 @@ func (e *EventHandler) Start(ctx context.Context) error { // run to allow the holdoff timer to batch the updates from // the API informers. lastDAGRebuild = time.Now() + + // initialSyncPollPeriod defines the duration to wait between polling attempts during the initial informer synchronization. + initialSyncPollPeriod = 100 * time.Millisecond + + // initialSyncPollTicker is the ticker that will trigger the periodic polling. + initialSyncPollTicker = time.NewTicker(initialSyncPollPeriod) + + // initialSyncPoll is the channel that will receive a signal when to poll the initial informer synchronization status. + initialSyncPoll = initialSyncPollTicker.C ) reset := func() (v int) { @@ -153,12 +162,6 @@ func (e *EventHandler) Start(ctx context.Context) error { return } - // It may be that there are no resources at all to process in watched namespaces. - // Initial (empty) DAG build is not needed and we can mark it as built immediately to allow the XDS server to start. - if e.syncTracker.HasSynced() { - e.initialDagBuilt.Store(true) - } - for { // In the main loop one of four things can happen. // 1. We're waiting for an event on op, stop, or pending, noting that @@ -196,12 +199,6 @@ func (e *EventHandler) Start(ctx context.Context) error { if updateOpAdd, ok := op.(opAdd); ok { if updateOpAdd.isInInitialList { e.syncTracker.Finished() - - // If this was the last event in the initial list but none of the events triggered DAG rebuild, - // then we can mark the (empty) DAG as built to allow the XDS server to start. - if e.syncTracker.HasSynced() && timer == nil { - e.initialDagBuilt.Store(true) - } } } case <-pending: @@ -220,9 +217,6 @@ func (e *EventHandler) Start(ctx context.Context) error { latestDAG := e.builder.Build() e.observer.OnChange(latestDAG) - // Allow XDS server to start (if it hasn't already). - e.initialDagBuilt.Store(true) - // Update the status on objects. for _, upd := range latestDAG.StatusCache.GetStatusUpdates() { e.statusUpdater.Send(upd) @@ -230,6 +224,12 @@ func (e *EventHandler) Start(ctx context.Context) error { e.incSequence() lastDAGRebuild = time.Now() + case <-initialSyncPoll: + if e.syncTracker.HasSynced() { + // Informer caches are synced, stop the polling and allow xDS server to start. + initialSyncPollTicker.Stop() + e.initialDagBuilt.Store(true) + } case <-ctx.Done(): // shutdown return nil