Skip to content

Commit

Permalink
checker, statistic: avoid leak in label statistic (#8703) (#8735)
Browse files Browse the repository at this point in the history
close #8700

Signed-off-by: lhy1024 <[email protected]>

Co-authored-by: lhy1024 <[email protected]>
  • Loading branch information
ti-chi-bot and lhy1024 authored Oct 22, 2024
1 parent 2d8f681 commit fc6e45c
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 11 deletions.
2 changes: 1 addition & 1 deletion pkg/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func HandleOverlaps(ctx context.Context, c Cluster, overlaps []*core.RegionInfo)
if c.GetRegionStats() != nil {
c.GetRegionStats().ClearDefunctRegion(item.GetID())
}
c.GetLabelStats().ClearDefunctRegion(item.GetID())
c.GetLabelStats().MarkDefunctRegion(item.GetID())
c.GetRuleManager().InvalidCache(item.GetID())
}
}
Expand Down
3 changes: 1 addition & 2 deletions pkg/mcs/scheduling/server/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -379,13 +379,12 @@ func (c *Cluster) waitSchedulersInitialized() {
}
}

// TODO: implement the following methods

// UpdateRegionsLabelLevelStats updates the status of the region label level by types.
func (c *Cluster) UpdateRegionsLabelLevelStats(regions []*core.RegionInfo) {
for _, region := range regions {
c.labelStats.Observe(region, c.getStoresWithoutLabelLocked(region, core.EngineKey, core.EngineTiFlash), c.persistConfig.GetLocationLabels())
}
c.labelStats.ClearDefunctRegions()
}

func (c *Cluster) getStoresWithoutLabelLocked(region *core.RegionInfo, key, value string) []*core.StoreInfo {
Expand Down
24 changes: 19 additions & 5 deletions pkg/statistics/region_collection.go
Original file line number Diff line number Diff line change
Expand Up @@ -365,13 +365,15 @@ type LabelStatistics struct {
syncutil.RWMutex
regionLabelStats map[uint64]string
labelCounter map[string]int
defunctRegions map[uint64]struct{}
}

// NewLabelStatistics creates a new LabelStatistics.
func NewLabelStatistics() *LabelStatistics {
return &LabelStatistics{
regionLabelStats: make(map[uint64]string),
labelCounter: make(map[string]int),
defunctRegions: make(map[uint64]struct{}),
}
}

Expand Down Expand Up @@ -405,14 +407,26 @@ func ResetLabelStatsMetrics() {
regionLabelLevelGauge.Reset()
}

// ClearDefunctRegion is used to handle the overlap region.
func (l *LabelStatistics) ClearDefunctRegion(regionID uint64) {
// MarkDefunctRegion is used to handle the overlap region.
// It is used to mark the region as defunct and remove it from the label statistics later.
func (l *LabelStatistics) MarkDefunctRegion(regionID uint64) {
l.Lock()
defer l.Unlock()
if label, ok := l.regionLabelStats[regionID]; ok {
l.labelCounter[label]--
delete(l.regionLabelStats, regionID)
l.defunctRegions[regionID] = struct{}{}
}

// ClearDefunctRegions is used to handle the overlap region.
// It is used to remove the defunct regions from the label statistics.
func (l *LabelStatistics) ClearDefunctRegions() {
l.Lock()
defer l.Unlock()
for regionID := range l.defunctRegions {
if label, ok := l.regionLabelStats[regionID]; ok {
l.labelCounter[label]--
delete(l.regionLabelStats, regionID)
}
}
l.defunctRegions = make(map[uint64]struct{})
}

// GetLabelCounter is only used for tests.
Expand Down
36 changes: 33 additions & 3 deletions server/cluster/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1125,6 +1125,7 @@ func TestRegionLabelIsolationLevel(t *testing.T) {
opt.SetReplicationConfig(cfg)
re.NoError(err)
cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend())
cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil)

for i := uint64(1); i <= 4; i++ {
var labels []*metapb.StoreLabel
Expand Down Expand Up @@ -1159,13 +1160,42 @@ func TestRegionLabelIsolationLevel(t *testing.T) {
StartKey: []byte{byte(1)},
EndKey: []byte{byte(2)},
}
r := core.NewRegionInfo(region, peers[0])
re.NoError(cluster.putRegion(r))
r1 := core.NewRegionInfo(region, peers[0])
re.NoError(cluster.putRegion(r1))

cluster.UpdateRegionsLabelLevelStats([]*core.RegionInfo{r})
cluster.UpdateRegionsLabelLevelStats([]*core.RegionInfo{r1})
counter := cluster.labelStats.GetLabelCounter()
re.Equal(0, counter["none"])
re.Equal(1, counter["zone"])

region = &metapb.Region{
Id: 10,
Peers: peers,
StartKey: []byte{byte(2)},
EndKey: []byte{byte(3)},
}
r2 := core.NewRegionInfo(region, peers[0])
re.NoError(cluster.putRegion(r2))

cluster.UpdateRegionsLabelLevelStats([]*core.RegionInfo{r2})
counter = cluster.labelStats.GetLabelCounter()
re.Equal(0, counter["none"])
re.Equal(2, counter["zone"])

// issue: https://github.com/tikv/pd/issues/8700
// step1: heartbeat a overlap region, which is used to simulate the case that the region is merged.
// step2: update region 9 and region 10, which is used to simulate the case that patrol is triggered.
// We should only count region 9.
overlapRegion := r1.Clone(
core.WithStartKey(r1.GetStartKey()),
core.WithEndKey(r2.GetEndKey()),
core.WithLeader(r2.GetPeer(8)),
)
re.NoError(cluster.HandleRegionHeartbeat(overlapRegion))
cluster.UpdateRegionsLabelLevelStats([]*core.RegionInfo{r1, r2})
counter = cluster.labelStats.GetLabelCounter()
re.Equal(0, counter["none"])
re.Equal(1, counter["zone"])
}

func heartbeatRegions(re *require.Assertions, cluster *RaftCluster, regions []*core.RegionInfo) {
Expand Down
1 change: 1 addition & 0 deletions server/cluster/scheduling_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ func (sc *schedulingController) UpdateRegionsLabelLevelStats(regions []*core.Reg
for _, region := range regions {
sc.labelStats.Observe(region, sc.getStoresWithoutLabelLocked(region, core.EngineKey, core.EngineTiFlash), sc.opt.GetLocationLabels())
}
sc.labelStats.ClearDefunctRegions()
}

func (sc *schedulingController) getStoresWithoutLabelLocked(region *core.RegionInfo, key, value string) []*core.StoreInfo {
Expand Down

0 comments on commit fc6e45c

Please sign in to comment.