From 3029ea65c4fc97d2222160b2fa8b4e25407648ba Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Fri, 10 Jan 2025 12:58:40 +0800 Subject: [PATCH] planner: correct plan when scan tidb related cluster table with KeepOrder (#51922) (#58846) close pingcap/tidb#51723 --- pkg/planner/core/find_best_task.go | 4 + pkg/planner/core/task.go | 76 +++++++++++++++++++ .../r/infoschema/cluster_tables.result | 48 ++++++++++++ .../t/infoschema/cluster_tables.test | 10 +++ 4 files changed, 138 insertions(+) diff --git a/pkg/planner/core/find_best_task.go b/pkg/planner/core/find_best_task.go index f31bbf1f15841..35bd029dc6782 100644 --- a/pkg/planner/core/find_best_task.go +++ b/pkg/planner/core/find_best_task.go @@ -790,6 +790,10 @@ func compareCandidates(sctx sessionctx.Context, prop *property.PhysicalProperty, } func (ds *DataSource) isMatchProp(path *util.AccessPath, prop *property.PhysicalProperty) bool { + if ds.table.Type().IsClusterTable() && !prop.IsSortItemEmpty() { + // TableScan with cluster table can't keep order. + return false + } var isMatchProp bool if path.IsIntHandlePath { pkCol := ds.getPKIsHandleCol() diff --git a/pkg/planner/core/task.go b/pkg/planner/core/task.go index ae47862b67e02..8dffd4d23d43a 100644 --- a/pkg/planner/core/task.go +++ b/pkg/planner/core/task.go @@ -21,6 +21,7 @@ import ( "github.com/pingcap/failpoint" "github.com/pingcap/tidb/pkg/expression" "github.com/pingcap/tidb/pkg/expression/aggregation" + "github.com/pingcap/tidb/pkg/infoschema" "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/parser/ast" "github.com/pingcap/tidb/pkg/parser/charset" @@ -1174,6 +1175,75 @@ func (p *PhysicalTopN) canPushDownToTiFlash(mppTask *mppTask) bool { return true } +// For https://github.com/pingcap/tidb/issues/51723, +// This function only supports `CLUSTER_SLOW_QUERY`, +// it will change plan from +// TopN -> TableReader -> TableFullScan[cop] to +// TopN -> TableReader -> Limit[cop] -> TableFullScan[cop] + keepOrder +func (p *PhysicalTopN) pushLimitDownToTiDBCop(copTsk *copTask) (task, bool) { + if copTsk.indexPlan != nil || copTsk.tablePlan == nil { + return nil, false + } + + var ( + selOnTblScan *PhysicalSelection + selSelectivity float64 + tblScan *PhysicalTableScan + err error + ok bool + ) + + copTsk.tablePlan, err = copTsk.tablePlan.Clone() + if err != nil { + return nil, false + } + finalTblScanPlan := copTsk.tablePlan + for len(finalTblScanPlan.Children()) > 0 { + selOnTblScan, _ = finalTblScanPlan.(*PhysicalSelection) + finalTblScanPlan = finalTblScanPlan.Children()[0] + } + + if tblScan, ok = finalTblScanPlan.(*PhysicalTableScan); !ok { + return nil, false + } + + // Check the table is `CLUSTER_SLOW_QUERY` or not. + if tblScan.Table.Name.O != infoschema.ClusterTableSlowLog { + return nil, false + } + + colsProp, ok := GetPropByOrderByItems(p.ByItems) + if !ok { + return nil, false + } + if len(colsProp.SortItems) != 1 || !colsProp.SortItems[0].Col.Equal(p.SCtx(), tblScan.HandleCols.GetCol(0)) { + return nil, false + } + if selOnTblScan != nil && tblScan.StatsInfo().RowCount > 0 { + selSelectivity = selOnTblScan.StatsInfo().RowCount / tblScan.StatsInfo().RowCount + } + tblScan.Desc = colsProp.SortItems[0].Desc + tblScan.KeepOrder = true + + childProfile := copTsk.plan().StatsInfo() + newCount := p.Offset + p.Count + stats := deriveLimitStats(childProfile, float64(newCount)) + pushedLimit := PhysicalLimit{ + Count: newCount, + }.Init(p.SCtx(), stats, p.SelectBlockOffset()) + pushedLimit.SetSchema(copTsk.tablePlan.Schema()) + copTsk = attachPlan2Task(pushedLimit, copTsk).(*copTask) + child := pushedLimit.Children()[0] + child.SetStats(child.StatsInfo().ScaleByExpectCnt(float64(newCount))) + if selSelectivity > 0 && selSelectivity < 1 { + scaledRowCount := child.StatsInfo().RowCount / selSelectivity + tblScan.SetStats(tblScan.StatsInfo().ScaleByExpectCnt(scaledRowCount)) + } + rootTask := copTsk.convertToRootTask(p.SCtx()) + return attachPlan2Task(p, rootTask), true +} + +// Attach2Task implements the PhysicalPlan interface. func (p *PhysicalTopN) attach2Task(tasks ...task) task { t := tasks[0].copy() cols := make([]*expression.Column, 0, len(p.ByItems)) @@ -1181,6 +1251,12 @@ func (p *PhysicalTopN) attach2Task(tasks ...task) task { cols = append(cols, expression.ExtractColumns(item.Expr)...) } needPushDown := len(cols) > 0 + if copTask, ok := t.(*copTask); ok && needPushDown && copTask.getStoreType() == kv.TiDB && len(copTask.rootTaskConds) == 0 { + newTask, changed := p.pushLimitDownToTiDBCop(copTask) + if changed { + return newTask + } + } if copTask, ok := t.(*copTask); ok && needPushDown && p.canPushDownToTiKV(copTask) && len(copTask.rootTaskConds) == 0 { // If all columns in topN are from index plan, we push it to index plan, otherwise we finish the index plan and // push it to table plan. diff --git a/tests/integrationtest/r/infoschema/cluster_tables.result b/tests/integrationtest/r/infoschema/cluster_tables.result index bca044ad116a9..feba3f5daedc8 100644 --- a/tests/integrationtest/r/infoschema/cluster_tables.result +++ b/tests/integrationtest/r/infoschema/cluster_tables.result @@ -10,3 +10,51 @@ select /*+ ignore_index(t, a) */ * from t where a = 1; id a create session binding from history using plan digest '20cf414ff6bd6fff3de17a266966020e81099b9fd1a29c4fd4b8aaf212f5c2c0'; drop binding for sql digest '83de0854921816c038565229b8008f5d679d373d16bf6b2a5cacd5937e11ea21'; +explain select * from information_schema.cluster_slow_query order by time limit 1; +id estRows task access object operator info +TopN_7 1.00 root information_schema.cluster_slow_query.time, offset:0, count:1 +└─TableReader_16 1.00 root data:Limit_15 + └─Limit_15 1.00 cop[tidb] offset:0, count:1 + └─TableFullScan_14 1.00 cop[tidb] table:CLUSTER_SLOW_QUERY keep order:true, stats:pseudo +explain select * from information_schema.cluster_slow_query order by time; +id estRows task access object operator info +Sort_4 10000.00 root information_schema.cluster_slow_query.time +└─TableReader_8 10000.00 root data:TableFullScan_7 + └─TableFullScan_7 10000.00 cop[tidb] table:CLUSTER_SLOW_QUERY keep order:false, stats:pseudo +explain select * from information_schema.cluster_slow_query order by time desc limit 1; +id estRows task access object operator info +TopN_7 1.00 root information_schema.cluster_slow_query.time:desc, offset:0, count:1 +└─TableReader_16 1.00 root data:Limit_15 + └─Limit_15 1.00 cop[tidb] offset:0, count:1 + └─TableFullScan_14 1.00 cop[tidb] table:CLUSTER_SLOW_QUERY keep order:true, desc, stats:pseudo +explain select * from information_schema.cluster_slow_query order by time desc; +id estRows task access object operator info +Sort_4 10000.00 root information_schema.cluster_slow_query.time:desc +└─TableReader_8 10000.00 root data:TableFullScan_7 + └─TableFullScan_7 10000.00 cop[tidb] table:CLUSTER_SLOW_QUERY keep order:false, stats:pseudo +explain select * from information_schema.cluster_slow_query WHERE (time between '2020-09-24 15:23:41.421396' and '2020-09-25 17:57:35.047111') and query != 'x' order by time limit 1; +id estRows task access object operator info +TopN_8 1.00 root information_schema.cluster_slow_query.time, offset:0, count:1 +└─TableReader_18 1.00 root data:Limit_17 + └─Limit_17 1.00 cop[tidb] offset:0, count:1 + └─Selection_16 1.00 cop[tidb] ne(information_schema.cluster_slow_query.query, "x") + └─TableRangeScan_15 1.50 cop[tidb] table:CLUSTER_SLOW_QUERY range:[2020-09-24 15:23:41.421396,2020-09-25 17:57:35.047111], keep order:true, stats:pseudo +explain select * from information_schema.cluster_slow_query WHERE (time between '2020-09-24 15:23:41.421396' and '2020-09-25 17:57:35.047111') and query != 'x' order by time; +id estRows task access object operator info +Sort_5 166.42 root information_schema.cluster_slow_query.time +└─TableReader_10 166.42 root data:Selection_9 + └─Selection_9 166.42 cop[tidb] ne(information_schema.cluster_slow_query.query, "x") + └─TableRangeScan_8 250.00 cop[tidb] table:CLUSTER_SLOW_QUERY range:[2020-09-24 15:23:41.421396,2020-09-25 17:57:35.047111], keep order:false, stats:pseudo +explain select * from information_schema.cluster_slow_query WHERE (time between '2020-09-24 15:23:41.421396' and '2020-09-25 17:57:35.047111') and query != 'x' order by time desc limit 1; +id estRows task access object operator info +TopN_8 1.00 root information_schema.cluster_slow_query.time:desc, offset:0, count:1 +└─TableReader_18 1.00 root data:Limit_17 + └─Limit_17 1.00 cop[tidb] offset:0, count:1 + └─Selection_16 1.00 cop[tidb] ne(information_schema.cluster_slow_query.query, "x") + └─TableRangeScan_15 1.50 cop[tidb] table:CLUSTER_SLOW_QUERY range:[2020-09-24 15:23:41.421396,2020-09-25 17:57:35.047111], keep order:true, desc, stats:pseudo +explain select * from information_schema.cluster_slow_query WHERE (time between '2020-09-24 15:23:41.421396' and '2020-09-25 17:57:35.047111') and query != 'x' order by time desc; +id estRows task access object operator info +Sort_5 166.42 root information_schema.cluster_slow_query.time:desc +└─TableReader_10 166.42 root data:Selection_9 + └─Selection_9 166.42 cop[tidb] ne(information_schema.cluster_slow_query.query, "x") + └─TableRangeScan_8 250.00 cop[tidb] table:CLUSTER_SLOW_QUERY range:[2020-09-24 15:23:41.421396,2020-09-25 17:57:35.047111], keep order:false, stats:pseudo diff --git a/tests/integrationtest/t/infoschema/cluster_tables.test b/tests/integrationtest/t/infoschema/cluster_tables.test index 020221645c7d9..2ef15d299b93a 100644 --- a/tests/integrationtest/t/infoschema/cluster_tables.test +++ b/tests/integrationtest/t/infoschema/cluster_tables.test @@ -10,3 +10,13 @@ select /*+ ignore_index(t, a) */ * from t where a = 1; create session binding from history using plan digest '20cf414ff6bd6fff3de17a266966020e81099b9fd1a29c4fd4b8aaf212f5c2c0'; drop binding for sql digest '83de0854921816c038565229b8008f5d679d373d16bf6b2a5cacd5937e11ea21'; +# TestIssue51723 +explain select * from information_schema.cluster_slow_query order by time limit 1; +explain select * from information_schema.cluster_slow_query order by time; +explain select * from information_schema.cluster_slow_query order by time desc limit 1; +explain select * from information_schema.cluster_slow_query order by time desc; +explain select * from information_schema.cluster_slow_query WHERE (time between '2020-09-24 15:23:41.421396' and '2020-09-25 17:57:35.047111') and query != 'x' order by time limit 1; +explain select * from information_schema.cluster_slow_query WHERE (time between '2020-09-24 15:23:41.421396' and '2020-09-25 17:57:35.047111') and query != 'x' order by time; +explain select * from information_schema.cluster_slow_query WHERE (time between '2020-09-24 15:23:41.421396' and '2020-09-25 17:57:35.047111') and query != 'x' order by time desc limit 1; +explain select * from information_schema.cluster_slow_query WHERE (time between '2020-09-24 15:23:41.421396' and '2020-09-25 17:57:35.047111') and query != 'x' order by time desc; +