From c65273eeb1285c3e5eaed7ea599abafb22f87403 Mon Sep 17 00:00:00 2001 From: mittalrishabh Date: Sun, 1 Dec 2024 23:37:11 -0800 Subject: [PATCH] retry as stale read on replica (#1509) Signed-off-by: rishabh_mittal --- internal/locate/region_request_state_test.go | 14 ++--- internal/locate/replica_selector.go | 35 ++++++++--- internal/locate/replica_selector_test.go | 63 ++++++++++++-------- 3 files changed, 70 insertions(+), 42 deletions(-) diff --git a/internal/locate/region_request_state_test.go b/internal/locate/region_request_state_test.go index 022fb13a6..e903cb4b7 100644 --- a/internal/locate/region_request_state_test.go +++ b/internal/locate/region_request_state_test.go @@ -367,7 +367,7 @@ func TestRegionCacheStaleRead(t *testing.T) { leaderRegionValid: true, leaderAsyncReload: util.Some(false), leaderSuccessReplica: []string{"z2", "z3"}, - leaderSuccessReadType: SuccessFollowerRead, + leaderSuccessReadType: SuccessStaleRead, followerRegionValid: true, followerAsyncReload: util.Some(false), followerSuccessReplica: []string{"z2"}, @@ -392,11 +392,11 @@ func TestRegionCacheStaleRead(t *testing.T) { leaderRegionValid: true, leaderAsyncReload: util.Some(false), leaderSuccessReplica: []string{"z3"}, - leaderSuccessReadType: SuccessFollowerRead, + leaderSuccessReadType: SuccessStaleRead, followerRegionValid: true, followerAsyncReload: util.Some(false), followerSuccessReplica: []string{"z3"}, - followerSuccessReadType: SuccessFollowerRead, + followerSuccessReadType: SuccessStaleRead, }, { do: leaderServerIsBusy, @@ -405,11 +405,11 @@ func TestRegionCacheStaleRead(t *testing.T) { leaderRegionValid: true, leaderAsyncReload: util.Some(false), leaderSuccessReplica: []string{"z2", "z3"}, - leaderSuccessReadType: SuccessFollowerRead, + leaderSuccessReadType: SuccessStaleRead, followerRegionValid: true, followerAsyncReload: util.Some(false), followerSuccessReplica: []string{"z2", "z3"}, - followerSuccessReadType: SuccessFollowerRead, + followerSuccessReadType: SuccessStaleRead, }, { do: leaderServerIsBusy, @@ -418,11 +418,11 @@ func TestRegionCacheStaleRead(t *testing.T) { leaderRegionValid: true, leaderAsyncReload: util.Some(false), leaderSuccessReplica: []string{"z3"}, - leaderSuccessReadType: SuccessFollowerRead, + leaderSuccessReadType: SuccessStaleRead, followerRegionValid: true, followerAsyncReload: util.Some(false), followerSuccessReplica: []string{"z3"}, - followerSuccessReadType: SuccessFollowerRead, + followerSuccessReadType: SuccessStaleRead, }, { do: leaderDown, diff --git a/internal/locate/replica_selector.go b/internal/locate/replica_selector.go index c9064549b..901592ca3 100644 --- a/internal/locate/replica_selector.go +++ b/internal/locate/replica_selector.go @@ -182,17 +182,21 @@ func (s *replicaSelector) nextForReplicaReadMixed(req *tikvrpc.Request) { } s.target = strategy.next(s) if s.target != nil { - if s.isStaleRead && s.attempts == 1 { - // stale-read request first access. - if !s.target.store.IsLabelsMatch(s.option.labels) && s.target.peer.Id != s.region.GetLeaderPeerID() { - // If the target replica's labels is not match and not leader, use replica read. - // This is for compatible with old version. - req.StaleRead = false - req.ReplicaRead = true - } else { - // use stale read. + if s.isStaleRead { + isStaleRead := true + if s.attempts != 1 || (!s.target.store.IsLabelsMatch(s.option.labels) && s.target.peer.Id != s.region.GetLeaderPeerID()) { + // retry or target replica's labels does not match and not leader + if strategy.canSendReplicaRead(s) { + // use replica read. + isStaleRead = false + } + } + if isStaleRead { req.StaleRead = true req.ReplicaRead = false + } else { + req.StaleRead = false + req.ReplicaRead = true } } else { // always use replica. @@ -300,6 +304,16 @@ func (s *ReplicaSelectMixedStrategy) next(selector *replicaSelector) *replica { return nil } +func (s *ReplicaSelectMixedStrategy) canSendReplicaRead(selector *replicaSelector) bool { + replicas := selector.replicas + replica := replicas[s.leaderIdx] + if replica.hasFlag(deadlineErrUsingConfTimeoutFlag) || replica.hasFlag(serverIsBusyFlag) { + // don't overwhelm the leader if it is busy + return false + } + return true +} + func hasDeadlineExceededError(replicas []*replica) bool { for _, replica := range replicas { if replica.hasFlag(deadlineErrUsingConfTimeoutFlag) { @@ -529,6 +543,9 @@ func (s *replicaSelector) onServerIsBusy( backoffErr := errors.Errorf("server is busy, ctx: %v", ctx) if s.canFastRetry() { s.addPendingBackoff(store, retry.BoTiKVServerBusy, backoffErr) + if s.target != nil { + s.target.addFlag(serverIsBusyFlag) + } return true, nil } err = bo.Backoff(retry.BoTiKVServerBusy, backoffErr) diff --git a/internal/locate/replica_selector_test.go b/internal/locate/replica_selector_test.go index e7af35f3a..d928b0a02 100644 --- a/internal/locate/replica_selector_test.go +++ b/internal/locate/replica_selector_test.go @@ -362,7 +362,7 @@ func TestReplicaReadAccessPathByCase(t *testing.T) { accessPath: []string{ "{addr: store2, replica-read: false, stale-read: true}", "{addr: store1, replica-read: false, stale-read: false}", - "{addr: store2, replica-read: true, stale-read: false}"}, + "{addr: store2, replica-read: false, stale-read: true}"}, respErr: "", respRegionError: nil, backoffCnt: 0, @@ -404,7 +404,7 @@ func TestReplicaReadAccessPathByCase(t *testing.T) { accessPath: []string{ "{addr: store2, replica-read: false, stale-read: true}", "{addr: store1, replica-read: false, stale-read: false}", - "{addr: store3, replica-read: true, stale-read: false}"}, // store2 has DeadLineExceededErr, so don't retry store2 even it is new leader. + "{addr: store3, replica-read: false, stale-read: true}"}, // store2 has DeadLineExceededErr, so don't retry store2 even it is new leader. respErr: "", respRegionError: nil, backoffCnt: 0, @@ -504,8 +504,8 @@ func TestReplicaReadAccessPathByCase(t *testing.T) { expect: &accessPathResult{ accessPath: []string{ "{addr: store1, replica-read: false, stale-read: true}", - "{addr: store2, replica-read: true, stale-read: false}", - "{addr: store3, replica-read: true, stale-read: false}"}, + "{addr: store2, replica-read: false, stale-read: true}", + "{addr: store3, replica-read: false, stale-read: true}"}, respErr: "", respRegionError: fakeEpochNotMatch, backoffCnt: 1, @@ -548,7 +548,7 @@ func TestReplicaReadAccessPathByCase(t *testing.T) { accessPath: []string{ "{addr: store3, replica-read: false, stale-read: true}", "{addr: store1, replica-read: false, stale-read: false}", - "{addr: store2, replica-read: true, stale-read: false}"}, + "{addr: store2, replica-read: false, stale-read: true}"}, respErr: "", respRegionError: nil, backoffCnt: 0, @@ -568,8 +568,8 @@ func TestReplicaReadAccessPathByCase(t *testing.T) { accessPath: []string{ "{addr: store2, replica-read: false, stale-read: true}", "{addr: store1, replica-read: false, stale-read: false}", // try leader with leader read. - "{addr: store2, replica-read: true, stale-read: false}", - "{addr: store3, replica-read: true, stale-read: false}", + "{addr: store2, replica-read: false, stale-read: true}", + "{addr: store3, replica-read: false, stale-read: true}", }, respErr: "", respRegionError: fakeEpochNotMatch, @@ -590,8 +590,8 @@ func TestReplicaReadAccessPathByCase(t *testing.T) { accessPath: []string{ "{addr: store1, replica-read: false, stale-read: true}", "{addr: store2, replica-read: false, stale-read: false}", // try leader with leader read. - "{addr: store3, replica-read: true, stale-read: false}", - "{addr: store1, replica-read: true, stale-read: false}", + "{addr: store3, replica-read: false, stale-read: true}", + "{addr: store1, replica-read: false, stale-read: true}", }, respErr: "", respRegionError: nil, @@ -611,8 +611,8 @@ func TestReplicaReadAccessPathByCase(t *testing.T) { accessPath: []string{ "{addr: store1, replica-read: false, stale-read: true}", "{addr: store2, replica-read: false, stale-read: false}", // try leader with leader read. - "{addr: store3, replica-read: true, stale-read: false}", - "{addr: store1, replica-read: true, stale-read: false}", + "{addr: store3, replica-read: false, stale-read: true}", + "{addr: store1, replica-read: false, stale-read: true}", }, respErr: "", respRegionError: fakeEpochNotMatch, @@ -654,8 +654,8 @@ func TestReplicaReadAccessPathByCase(t *testing.T) { expect: &accessPathResult{ accessPath: []string{ "{addr: store1, replica-read: false, stale-read: true}", - "{addr: store2, replica-read: true, stale-read: false}", - "{addr: store3, replica-read: true, stale-read: false}", + "{addr: store2, replica-read: false, stale-read: true}", + "{addr: store3, replica-read: false, stale-read: true}", }, respErr: "", respRegionError: nil, @@ -920,7 +920,7 @@ func TestReplicaReadAccessPathByCase2(t *testing.T) { accessPath: []string{ "{addr: store2, replica-read: false, stale-read: true}", "{addr: store1, replica-read: false, stale-read: false}", - "{addr: store3, replica-read: true, stale-read: false}"}, + "{addr: store3, replica-read: false, stale-read: true}"}, respErr: "", respRegionError: nil, backoffCnt: 0, @@ -1052,9 +1052,16 @@ func TestReplicaReadAccessPathByBasicCase(t *testing.T) { backoff = []string{} } if staleRead { - accessPath = []string{ - "{addr: store1, replica-read: false, stale-read: true}", - "{addr: store2, replica-read: true, stale-read: false}", + if tp == ServerIsBusyErr || tp == ServerIsBusyWithEstimatedWaitMsErr { + accessPath = []string{ + "{addr: store1, replica-read: false, stale-read: true}", + "{addr: store2, replica-read: false, stale-read: true}", + } + } else { + accessPath = []string{ + "{addr: store1, replica-read: false, stale-read: true}", + "{addr: store2, replica-read: true, stale-read: false}", + } } } default: @@ -1918,8 +1925,8 @@ func TestReplicaReadAccessPathByStaleReadCase(t *testing.T) { accessPath: []string{ "{addr: store2, replica-read: false, stale-read: true}", "{addr: store1, replica-read: false, stale-read: false}", // try leader with leader read. - "{addr: store2, replica-read: true, stale-read: false}", - "{addr: store3, replica-read: true, stale-read: false}", + "{addr: store2, replica-read: false, stale-read: true}", + "{addr: store3, replica-read: false, stale-read: true}", }, respErr: "", respRegionError: fakeEpochNotMatch, @@ -1940,8 +1947,8 @@ func TestReplicaReadAccessPathByStaleReadCase(t *testing.T) { accessPath: []string{ "{addr: store1, replica-read: false, stale-read: true}", "{addr: store2, replica-read: false, stale-read: false}", // try leader with leader read. - "{addr: store3, replica-read: true, stale-read: false}", - "{addr: store1, replica-read: true, stale-read: false}", + "{addr: store3, replica-read: false, stale-read: true}", + "{addr: store1, replica-read: false, stale-read: true}", }, respErr: "", respRegionError: fakeEpochNotMatch, @@ -2050,8 +2057,8 @@ func TestReplicaReadAccessPathByStaleReadCase(t *testing.T) { expect: &accessPathResult{ accessPath: []string{ "{addr: store1, replica-read: false, stale-read: true}", - "{addr: store2, replica-read: true, stale-read: false}", - "{addr: store3, replica-read: true, stale-read: false}", + "{addr: store2, replica-read: false, stale-read: true}", + "{addr: store3, replica-read: false, stale-read: true}", }, respErr: "", respRegionError: nil, @@ -2073,7 +2080,7 @@ func TestReplicaReadAccessPathByStaleReadCase(t *testing.T) { accessPath: []string{ "{addr: store2, replica-read: false, stale-read: true}", "{addr: store1, replica-read: false, stale-read: false}", - "{addr: store3, replica-read: true, stale-read: false}", + "{addr: store3, replica-read: false, stale-read: true}", }, respErr: "", respRegionError: nil, @@ -2095,7 +2102,7 @@ func TestReplicaReadAccessPathByStaleReadCase(t *testing.T) { accessPath: []string{ "{addr: store2, replica-read: false, stale-read: true}", "{addr: store1, replica-read: false, stale-read: false}", - "{addr: store3, replica-read: true, stale-read: false}", + "{addr: store3, replica-read: false, stale-read: true}", }, respErr: "", respRegionError: nil, @@ -2666,7 +2673,11 @@ func TestReplicaReadAvoidSlowStore(t *testing.T) { if expectedFirstStore == 3 { // Retry on store 2 which is a follower. // Stale-read mode falls back to replica-read mode. - expectedSecondPath = "{addr: store2, replica-read: true, stale-read: false}" + if staleRead { + expectedSecondPath = "{addr: store2, replica-read: false, stale-read: true}" + } else { + expectedSecondPath = "{addr: store2, replica-read: true, stale-read: false}" + } } else { if staleRead { // Retry in leader read mode