diff --git a/.github/workflows/compatibility.yml b/.github/workflows/compatibility.yml index e4745ee58..0097f7681 100644 --- a/.github/workflows/compatibility.yml +++ b/.github/workflows/compatibility.yml @@ -24,8 +24,8 @@ jobs: - name: Checkout TiDB uses: actions/checkout@v2 with: - repository: JmPotato/tidb - ref: update_pd_client + repository: crazycs520/tidb + ref: opt-stats path: tidb - name: Check build diff --git a/integration_tests/go.mod b/integration_tests/go.mod index 080200181..55cd65c91 100644 --- a/integration_tests/go.mod +++ b/integration_tests/go.mod @@ -11,7 +11,7 @@ require ( github.com/pkg/errors v0.9.1 github.com/stretchr/testify v1.10.0 github.com/tidwall/gjson v1.14.1 - github.com/tikv/client-go/v2 v2.0.8-0.20241220061251-c5d92baf4928 + github.com/tikv/client-go/v2 v2.0.8-0.20241223070848-fd950fcf9fcc github.com/tikv/pd/client v0.0.0-20241220053006-461b86adc78d go.uber.org/goleak v1.3.0 ) @@ -114,7 +114,7 @@ require ( replace ( github.com/go-ldap/ldap/v3 => github.com/YangKeao/ldap/v3 v3.4.5-0.20230421065457-369a3bab1117 - github.com/pingcap/tidb => github.com/JmPotato/tidb v1.1.0-beta.0.20241223063748-1032fe27d1d9 + github.com/pingcap/tidb => github.com/crazycs520/tidb v1.1.0-beta.0.20241225031701-9554991d22f6 github.com/tikv/client-go/v2 => ../ ) diff --git a/integration_tests/go.sum b/integration_tests/go.sum index 5954937c9..f4d5d1aae 100644 --- a/integration_tests/go.sum +++ b/integration_tests/go.sum @@ -840,8 +840,6 @@ github.com/DataDog/zstd v1.5.5 h1:oWf5W7GtOLgp6bciQYDmhHHjdhYkALu6S/5Ni9ZgSvQ= github.com/DataDog/zstd v1.5.5/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw= github.com/HdrHistogram/hdrhistogram-go v1.1.2 h1:5IcZpTvzydCQeHzK4Ef/D5rrSqwxob0t8PQPMybUNFM= github.com/HdrHistogram/hdrhistogram-go v1.1.2/go.mod h1:yDgFjdqOqDEKOvasDdhWNXYg9BVp4O+o5f6V/ehm6Oo= -github.com/JmPotato/tidb v1.1.0-beta.0.20241223063748-1032fe27d1d9 h1:AjyLRfAHeNSwmYwgT+PgffG2VGxbRIVXgvh4Uqssr8I= -github.com/JmPotato/tidb v1.1.0-beta.0.20241223063748-1032fe27d1d9/go.mod h1:4yjTiTiYTvEYe8lI1p2JwYz6UayXzUw5S4IdtOyTAJc= github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c/go.mod h1:X0CRv0ky0k6m906ixxpzmDRLvX58TFUKS2eePweuyxk= github.com/Masterminds/semver v1.5.0 h1:H65muMkzWKEuNDnfl9d70GUjFniHKHRbFPGBuZ3QEww= github.com/Masterminds/semver v1.5.0/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y= @@ -948,6 +946,8 @@ github.com/coreos/go-semver v0.3.1/go.mod h1:irMmmIw/7yzSRPWryHsK7EYSg09caPQL03V github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= +github.com/crazycs520/tidb v1.1.0-beta.0.20241225031701-9554991d22f6 h1:yz4Q18nLRc/8R4/zFsG6Lw/q/wT6Yn60QuOC6DpWDXc= +github.com/crazycs520/tidb v1.1.0-beta.0.20241225031701-9554991d22f6/go.mod h1:9slOs9kUMdP48FtRb7K/njHPSKcVNiJqWYVsCxtPvQE= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548 h1:iwZdTE0PVqJCos1vaoKsclOGD3ADKpshg3SRtYBbwso= github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548/go.mod h1:e6NPNENfs9mPDVNRekM7lKScauxd5kXTr1Mfyig6TDM= diff --git a/integration_tests/snapshot_test.go b/integration_tests/snapshot_test.go index aee209543..3f0ef9716 100644 --- a/integration_tests/snapshot_test.go +++ b/integration_tests/snapshot_test.go @@ -347,6 +347,13 @@ func (s *testSnapshotSuite) TestSnapshotRuntimeStats() { "scan_detail: {total_process_keys: 20, total_process_keys_size: 20, total_keys: 30, get_snapshot_time: 1µs, " + "rocksdb: {delete_skipped_count: 10, key_skipped_count: 2, block: {cache_hit_count: 20, read_count: 40, read_byte: 30 Bytes}}}" s.Equal(expect, snapshot.FormatStats()) + snapshot.GetResolveLockDetail().ResolveLockTime = int64(time.Second) + expect = "Get:{num_rpc:4, total_time:2s},txnLockFast_backoff:{num:2, total_time:10ms}, " + + "time_detail: {total_process_time: 200ms, total_wait_time: 200ms}, " + + "resolve_lock_time:1s, " + + "scan_detail: {total_process_keys: 20, total_process_keys_size: 20, total_keys: 30, get_snapshot_time: 1µs, " + + "rocksdb: {delete_skipped_count: 10, key_skipped_count: 2, block: {cache_hit_count: 20, read_count: 40, read_byte: 30 Bytes}}}" + s.Equal(expect, snapshot.FormatStats()) } func (s *testSnapshotSuite) TestRCRead() { diff --git a/internal/locate/region_request.go b/internal/locate/region_request.go index b3e71de10..042694e8c 100644 --- a/internal/locate/region_request.go +++ b/internal/locate/region_request.go @@ -127,7 +127,8 @@ func (s *RegionRequestSender) String() string { // RegionRequestRuntimeStats records the runtime stats of send region requests. type RegionRequestRuntimeStats struct { - RPCStats map[tikvrpc.CmdType]*RPCRuntimeStats + // RPCStatsList uses to record RPC requests stats, since in most cases, only one kind of rpc request is sent at a time, use slice instead of map for performance. + RPCStatsList []RPCRuntimeStats RequestErrorStats } @@ -142,29 +143,47 @@ type RequestErrorStats struct { // NewRegionRequestRuntimeStats returns a new RegionRequestRuntimeStats. func NewRegionRequestRuntimeStats() *RegionRequestRuntimeStats { return &RegionRequestRuntimeStats{ - RPCStats: make(map[tikvrpc.CmdType]*RPCRuntimeStats), + RPCStatsList: make([]RPCRuntimeStats, 0, 1), } } // RPCRuntimeStats indicates the RPC request count and consume time. type RPCRuntimeStats struct { - Count int64 + Cmd tikvrpc.CmdType + Count uint32 // Send region request consume time. - Consume int64 + Consume time.Duration } // RecordRPCRuntimeStats uses to record the rpc count and duration stats. func (r *RegionRequestRuntimeStats) RecordRPCRuntimeStats(cmd tikvrpc.CmdType, d time.Duration) { - stat, ok := r.RPCStats[cmd] - if !ok { - r.RPCStats[cmd] = &RPCRuntimeStats{ - Count: 1, - Consume: int64(d), + for i := range r.RPCStatsList { + if r.RPCStatsList[i].Cmd == cmd { + r.RPCStatsList[i].Count++ + r.RPCStatsList[i].Consume += d + return + } + } + r.RPCStatsList = append(r.RPCStatsList, RPCRuntimeStats{ + Cmd: cmd, + Count: 1, + Consume: d, + }) +} + +// GetRPCStatsCount returns the total rpc types count. +func (r *RegionRequestRuntimeStats) GetRPCStatsCount() int { + return len(r.RPCStatsList) +} + +// GetCmdRPCCount returns the rpc count of the specified cmd type. +func (r *RegionRequestRuntimeStats) GetCmdRPCCount(cmd tikvrpc.CmdType) uint32 { + for i := range r.RPCStatsList { + if r.RPCStatsList[i].Cmd == cmd { + return r.RPCStatsList[i].Count } - return } - stat.Count++ - stat.Consume += int64(d) + return 0 } // RecordRPCErrorStats uses to record the request error(region error label and rpc error) info and count. @@ -198,15 +217,15 @@ func (r *RegionRequestRuntimeStats) String() string { return "" } var builder strings.Builder - for k, v := range r.RPCStats { + for _, v := range r.RPCStatsList { if builder.Len() > 0 { builder.WriteByte(',') } - builder.WriteString(k.String()) + builder.WriteString(v.Cmd.String()) builder.WriteString(":{num_rpc:") - builder.WriteString(strconv.FormatInt(v.Count, 10)) + builder.WriteString(strconv.FormatUint(uint64(v.Count), 10)) builder.WriteString(", total_time:") - builder.WriteString(util.FormatDuration(time.Duration(v.Consume))) + builder.WriteString(util.FormatDuration(v.Consume)) builder.WriteString("}") } if errStatsStr := r.RequestErrorStats.String(); errStatsStr != "" { @@ -242,7 +261,8 @@ func (r *RequestErrorStats) String() string { // Clone returns a copy of itself. func (r *RegionRequestRuntimeStats) Clone() *RegionRequestRuntimeStats { newRs := NewRegionRequestRuntimeStats() - maps.Copy(newRs.RPCStats, r.RPCStats) + newRs.RPCStatsList = make([]RPCRuntimeStats, 0, len(r.RPCStatsList)) + newRs.RPCStatsList = append(newRs.RPCStatsList, r.RPCStatsList...) if len(r.ErrStats) > 0 { newRs.ErrStats = make(map[string]int) maps.Copy(newRs.ErrStats, r.ErrStats) @@ -256,17 +276,8 @@ func (r *RegionRequestRuntimeStats) Merge(rs *RegionRequestRuntimeStats) { if rs == nil { return } - for cmd, v := range rs.RPCStats { - stat, ok := r.RPCStats[cmd] - if !ok { - r.RPCStats[cmd] = &RPCRuntimeStats{ - Count: v.Count, - Consume: v.Consume, - } - continue - } - stat.Count += v.Count - stat.Consume += v.Consume + for i := range rs.RPCStatsList { + r.mergeRPCRuntimeStats(rs.RPCStatsList[i]) } if len(rs.ErrStats) > 0 { if r.ErrStats == nil { @@ -279,6 +290,17 @@ func (r *RegionRequestRuntimeStats) Merge(rs *RegionRequestRuntimeStats) { } } +func (r *RegionRequestRuntimeStats) mergeRPCRuntimeStats(rs RPCRuntimeStats) { + for i := range r.RPCStatsList { + if r.RPCStatsList[i].Cmd == rs.Cmd { + r.RPCStatsList[i].Count += rs.Count + r.RPCStatsList[i].Consume += rs.Consume + return + } + } + r.RPCStatsList = append(r.RPCStatsList, rs) +} + // ReplicaAccessStats records the replica access info. type ReplicaAccessStats struct { // AccessInfos records the access info diff --git a/internal/locate/region_request3_test.go b/internal/locate/region_request3_test.go index eec6e1eaa..1459aa88b 100644 --- a/internal/locate/region_request3_test.go +++ b/internal/locate/region_request3_test.go @@ -1168,10 +1168,10 @@ func (s *testRegionRequestToThreeStoresSuite) TestSendReqFirstTimeout() { regionErr, err := resp.GetRegionError() s.Nil(err) s.True(IsFakeRegionError(regionErr)) - s.Equal(1, len(s.regionRequestSender.Stats.RPCStats)) - s.Equal(int64(3), s.regionRequestSender.Stats.RPCStats[tikvrpc.CmdGet].Count) // 3 rpc - s.Equal(3, len(reqTargetAddrs)) // each rpc to a different store. - s.Equal(0, bo.GetTotalBackoffTimes()) // no backoff since fast retry. + s.Equal(1, s.regionRequestSender.Stats.GetRPCStatsCount()) + s.Equal(uint32(3), s.regionRequestSender.Stats.GetCmdRPCCount(tikvrpc.CmdGet)) // 3 rpc + s.Equal(3, len(reqTargetAddrs)) // each rpc to a different store. + s.Equal(0, bo.GetTotalBackoffTimes()) // no backoff since fast retry. // warn: must rest MaxExecutionDurationMs before retry. resetStats() if staleRead { @@ -1187,9 +1187,9 @@ func (s *testRegionRequestToThreeStoresSuite) TestSendReqFirstTimeout() { s.Nil(err) s.Nil(regionErr) s.Equal([]byte("value"), resp.Resp.(*kvrpcpb.GetResponse).Value) - s.Equal(1, len(s.regionRequestSender.Stats.RPCStats)) - s.Equal(int64(1), s.regionRequestSender.Stats.RPCStats[tikvrpc.CmdGet].Count) // 1 rpc - s.Equal(0, bo.GetTotalBackoffTimes()) // no backoff since fast retry. + s.Equal(1, s.regionRequestSender.Stats.GetRPCStatsCount()) + s.Equal(uint32(1), s.regionRequestSender.Stats.GetCmdRPCCount(tikvrpc.CmdGet)) // 1 rpc + s.Equal(0, bo.GetTotalBackoffTimes()) // no backoff since fast retry. } } @@ -1398,9 +1398,9 @@ func (s *testRegionRequestToThreeStoresSuite) TestStaleReadTryFollowerAfterTimeo s.Nil(err) s.Nil(regionErr) s.Equal([]byte("value"), resp.Resp.(*kvrpcpb.GetResponse).Value) - s.Equal(1, len(s.regionRequestSender.Stats.RPCStats)) - s.Equal(int64(2), s.regionRequestSender.Stats.RPCStats[tikvrpc.CmdGet].Count) // 2 rpc - s.Equal(0, bo.GetTotalBackoffTimes()) // no backoff since fast retry. + s.Equal(1, s.regionRequestSender.Stats.GetRPCStatsCount()) + s.Equal(uint32(2), s.regionRequestSender.Stats.GetCmdRPCCount(tikvrpc.CmdGet)) // 2 rpc + s.Equal(0, bo.GetTotalBackoffTimes()) // no backoff since fast retry. } func (s *testRegionRequestToThreeStoresSuite) TestDoNotTryUnreachableLeader() { diff --git a/txnkv/txnsnapshot/snapshot.go b/txnkv/txnsnapshot/snapshot.go index 3135cf62e..6d5abfc1c 100644 --- a/txnkv/txnsnapshot/snapshot.go +++ b/txnkv/txnsnapshot/snapshot.go @@ -37,9 +37,9 @@ package txnsnapshot import ( "bytes" "context" - "fmt" "math" "runtime" + "strconv" "sync" "sync/atomic" "time" @@ -574,7 +574,7 @@ func (s *KVSnapshot) batchGetSingleRegion(bo *retry.Backoffer, batch batchKeys, resolveLocksOpts := txnlock.ResolveLocksOptions{ CallerStartTS: s.version, Locks: locks, - Detail: s.getResolveLockDetail(), + Detail: s.GetResolveLockDetail(), } resolveLocksRes, err := cli.ResolveLocksWithOpts(bo, resolveLocksOpts) msBeforeExpired := resolveLocksRes.TTL @@ -795,7 +795,7 @@ func (s *KVSnapshot) get(ctx context.Context, bo *retry.Backoffer, k []byte) ([] resolveLocksOpts := txnlock.ResolveLocksOptions{ CallerStartTS: s.version, Locks: locks, - Detail: s.getResolveLockDetail(), + Detail: s.GetResolveLockDetail(), } resolveLocksRes, err := cli.ResolveLocksWithOpts(bo, resolveLocksOpts) if err != nil { @@ -820,17 +820,6 @@ func (s *KVSnapshot) mergeExecDetail(detail *kvrpcpb.ExecDetailsV2) { if detail == nil || s.mu.stats == nil { return } - if s.mu.stats.resolveLockDetail == nil { - s.mu.stats.resolveLockDetail = &util.ResolveLockDetail{} - } - if s.mu.stats.scanDetail == nil { - s.mu.stats.scanDetail = &util.ScanDetail{ - ResolveLock: s.mu.stats.resolveLockDetail, - } - } - if s.mu.stats.timeDetail == nil { - s.mu.stats.timeDetail = &util.TimeDetail{} - } s.mu.stats.scanDetail.MergeFromScanDetailV2(detail.ScanDetailV2) s.mu.stats.timeDetail.MergeFromTimeDetail(detail.TimeDetailV2, detail.TimeDetail) } @@ -1113,13 +1102,14 @@ func (s *KVSnapshot) GetKVReadTimeout() time.Duration { return s.readTimeout } -func (s *KVSnapshot) getResolveLockDetail() *util.ResolveLockDetail { +// GetResolveLockDetail returns ResolveLockDetail, exports for testing. +func (s *KVSnapshot) GetResolveLockDetail() *util.ResolveLockDetail { s.mu.RLock() defer s.mu.RUnlock() if s.mu.stats == nil { return nil } - return s.mu.stats.resolveLockDetail + return &s.mu.stats.resolveLockDetail } // SetPipelined sets the snapshot to pipelined mode. @@ -1138,14 +1128,18 @@ type SnapshotRuntimeStats struct { rpcStats *locate.RegionRequestRuntimeStats backoffSleepMS map[string]int backoffTimes map[string]int - scanDetail *util.ScanDetail - timeDetail *util.TimeDetail - resolveLockDetail *util.ResolveLockDetail + scanDetail util.ScanDetail + timeDetail util.TimeDetail + resolveLockDetail util.ResolveLockDetail } // Clone implements the RuntimeStats interface. func (rs *SnapshotRuntimeStats) Clone() *SnapshotRuntimeStats { - newRs := SnapshotRuntimeStats{} + newRs := SnapshotRuntimeStats{ + scanDetail: rs.scanDetail, + timeDetail: rs.timeDetail, + resolveLockDetail: rs.resolveLockDetail, + } if rs.rpcStats != nil { newRs.rpcStats = rs.rpcStats.Clone() } @@ -1159,19 +1153,6 @@ func (rs *SnapshotRuntimeStats) Clone() *SnapshotRuntimeStats { newRs.backoffTimes[k] += v } } - - if rs.scanDetail != nil { - newRs.scanDetail = rs.scanDetail - } - - if rs.timeDetail != nil { - newRs.timeDetail = rs.timeDetail - } - - if rs.resolveLockDetail != nil { - newRs.resolveLockDetail = rs.resolveLockDetail - } - return &newRs } @@ -1197,6 +1178,9 @@ func (rs *SnapshotRuntimeStats) Merge(other *SnapshotRuntimeStats) { rs.backoffTimes[k] += v } } + rs.scanDetail.Merge(&other.scanDetail) + rs.timeDetail.Merge(&other.timeDetail) + rs.resolveLockDetail.Merge(&other.resolveLockDetail) } // String implements fmt.Stringer interface. @@ -1211,13 +1195,23 @@ func (rs *SnapshotRuntimeStats) String() string { } ms := rs.backoffSleepMS[k] d := time.Duration(ms) * time.Millisecond - buf.WriteString(fmt.Sprintf("%s_backoff:{num:%d, total_time:%s}", k, v, util.FormatDuration(d))) + buf.WriteString(k) + buf.WriteString("_backoff:{num:") + buf.WriteString(strconv.Itoa(v)) + buf.WriteString(", total_time:") + buf.WriteString(util.FormatDuration(d)) + buf.WriteString("}") } timeDetail := rs.timeDetail.String() if timeDetail != "" { buf.WriteString(", ") buf.WriteString(timeDetail) } + if rs.resolveLockDetail.ResolveLockTime > 0 { + buf.WriteString(", ") + buf.WriteString("resolve_lock_time:") + buf.WriteString(util.FormatDuration(time.Duration(rs.resolveLockDetail.ResolveLockTime))) + } scanDetail := rs.scanDetail.String() if scanDetail != "" { buf.WriteString(", ") @@ -1228,19 +1222,14 @@ func (rs *SnapshotRuntimeStats) String() string { // GetTimeDetail returns the timeDetail func (rs *SnapshotRuntimeStats) GetTimeDetail() *util.TimeDetail { - return rs.timeDetail + return &rs.timeDetail } // GetCmdRPCCount returns the count of the corresponding kind of rpc requests func (rs *SnapshotRuntimeStats) GetCmdRPCCount(cmd tikvrpc.CmdType) int64 { - if rs.rpcStats == nil || len(rs.rpcStats.RPCStats) == 0 { - return 0 - } - - stats, ok := rs.rpcStats.RPCStats[cmd] - if !ok { + if rs.rpcStats == nil { return 0 } - return stats.Count + return int64(rs.rpcStats.GetCmdRPCCount(cmd)) } diff --git a/util/execdetails.go b/util/execdetails.go index 74f3d2c09..5977ee9a4 100644 --- a/util/execdetails.go +++ b/util/execdetails.go @@ -417,8 +417,6 @@ type ScanDetail struct { RocksdbBlockReadDuration time.Duration // GetSnapshotDuration is the time spent getting an engine snapshot. GetSnapshotDuration time.Duration - - ResolveLock *ResolveLockDetail } // Merge merges scan detail execution details into self.