Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: the performance of calculating index selectivity #1938

Merged
merged 17 commits into from
Oct 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 37 additions & 40 deletions sqle/driver/mysql/audit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4006,34 +4006,39 @@ func TestCheckIndexOption(t *testing.T) {
assert.NoError(t, err)

inspect1 := NewMockInspect(e)
handler.ExpectQuery(regexp.QuoteMeta("SELECT COUNT( DISTINCT ( v1 ) ) / COUNT( * ) * 100 AS v1 FROM exist_tb_3")).
WillReturnRows(sqlmock.NewRows([]string{"v1"}).
AddRow("100.0000"))
handler.ExpectQuery(regexp.QuoteMeta(`SELECT COUNT( DISTINCT ( v1 ) ) / COUNT( * ) * 100 AS v1 FROM (SELECT v1 FROM exist_db.exist_tb_3 LIMIT 50000) t;`)).
WillReturnRows(
sqlmock.NewRows([]string{"v1"}).AddRow("100.0000"),
)
runSingleRuleInspectCase(rule, t, "", inspect1, "alter table exist_tb_3 add primary key (v1);", newTestResult())

inspect2 := NewMockInspect(e)
handler.ExpectQuery(regexp.QuoteMeta("SELECT COUNT( DISTINCT ( v1 ) ) / COUNT( * ) * 100 AS v1 FROM exist_tb_3")).
WillReturnRows(sqlmock.NewRows([]string{"v1"}).
AddRow("100.0000"))
handler.ExpectQuery(regexp.QuoteMeta(`SELECT COUNT( DISTINCT ( v1 ) ) / COUNT( * ) * 100 AS v1 FROM (SELECT v1 FROM exist_db.exist_tb_3 LIMIT 50000) t;`)).
WillReturnRows(
sqlmock.NewRows([]string{"v1"}).AddRow("100.0000"),
)
runSingleRuleInspectCase(rule, t, "", inspect2, "alter table exist_tb_3 add unique(v1);", newTestResult())

inspect3 := NewMockInspect(e)
handler.ExpectQuery(regexp.QuoteMeta("SELECT COUNT( DISTINCT ( v2 ) ) / COUNT( * ) * 100 AS v2 FROM exist_tb_3")).
WillReturnRows(sqlmock.NewRows([]string{"v2"}).
AddRow("30.0000"))
handler.ExpectQuery(regexp.QuoteMeta(`SELECT COUNT( DISTINCT ( v2 ) ) / COUNT( * ) * 100 AS v2 FROM (SELECT v2 FROM exist_db.exist_tb_3 LIMIT 50000) t;`)).
WillReturnRows(
sqlmock.NewRows([]string{"v2"}).AddRow("30.0000"),
)
runSingleRuleInspectCase(rule, t, "", inspect3, "alter table exist_tb_3 add index idx_c2(v2);",
newTestResult().addResult(rulepkg.DDLCheckIndexOption, "v2", 70))

inspect4 := NewMockInspect(e)
handler.ExpectQuery(regexp.QuoteMeta("SELECT COUNT( DISTINCT ( v3 ) ) / COUNT( * ) * 100 AS v3 FROM exist_tb_3")).
WillReturnRows(sqlmock.NewRows([]string{"v3"}).
AddRow("70.0000"))
handler.ExpectQuery(regexp.QuoteMeta(`SELECT COUNT( DISTINCT ( v3 ) ) / COUNT( * ) * 100 AS v3 FROM (SELECT v3 FROM exist_db.exist_tb_3 LIMIT 50000) t;`)).
WillReturnRows(
sqlmock.NewRows([]string{"v3"}).AddRow("70.0000"),
)
runSingleRuleInspectCase(rule, t, "", inspect4, "alter table exist_tb_3 add fulltext(v3);", newTestResult())

inspect5 := NewMockInspect(e)
handler.ExpectQuery(regexp.QuoteMeta("SELECT COUNT( DISTINCT ( v1 ) ) / COUNT( * ) * 100 AS v1,COUNT( DISTINCT ( v2 ) ) / COUNT( * ) * 100 AS v2 FROM exist_tb_3")).
WillReturnRows(sqlmock.NewRows([]string{"v1", "v2"}).
AddRow("100.0000", "30.0000"))
handler.ExpectQuery(regexp.QuoteMeta(`SELECT COUNT( DISTINCT ( v1 ) ) / COUNT( * ) * 100 AS v1,COUNT( DISTINCT ( v2 ) ) / COUNT( * ) * 100 AS v2 FROM (SELECT v1,v2 FROM exist_db.exist_tb_3 LIMIT 50000) t;`)).
WillReturnRows(
sqlmock.NewRows([]string{"v1"}).AddRow("100.0000"),
)
runSingleRuleInspectCase(rule, t, "", inspect5, "alter table exist_tb_3 add index idx_c1_c2(v1,v2);", newTestResult())

}
Expand Down Expand Up @@ -5945,46 +5950,38 @@ func TestDMLCheckIndexSelectivity(t *testing.T) {
inspect1 := NewMockInspect(e)
handler.ExpectQuery(regexp.QuoteMeta("select * from exist_tb_6")).
WillReturnRows(sqlmock.NewRows([]string{"key", "table"}).AddRow("v1", "exist_tb_6"))
handler.ExpectQuery(regexp.QuoteMeta("SHOW INDEX FROM exist_db.exist_tb_6")).
WillReturnRows(sqlmock.NewRows([]string{"Column_name", "Key_name"}).AddRow("v1", "v1"))
handler.ExpectQuery(regexp.QuoteMeta("SELECT COUNT( DISTINCT ( v1 ) ) / COUNT( * ) * 100 AS v1 FROM exist_tb_6")).
WillReturnRows(sqlmock.NewRows([]string{"v1"}).
AddRow("50.0000"))
handler.ExpectQuery(regexp.QuoteMeta(`SELECT (s.CARDINALITY / t.TABLE_ROWS) * 100 AS INDEX_SELECTIVITY,s.INDEX_NAME FROM INFORMATION_SCHEMA.STATISTICS s JOIN INFORMATION_SCHEMA.TABLES t ON s.TABLE_SCHEMA = t.TABLE_SCHEMA AND s.TABLE_NAME = t.TABLE_NAME WHERE (s.TABLE_SCHEMA , s.TABLE_NAME , s.INDEX_NAME) IN (('exist_db', 'exist_tb_6', 'v1'));`)).
WillReturnRows(
sqlmock.NewRows([]string{"INDEX_SELECTIVITY", "INDEX_NAME"}).AddRow("50.0000", "v1"),
)
runSingleRuleInspectCase(rule, t, "", inspect1, "select * from exist_tb_6 where v1='10'", newTestResult().add(driverV2.RuleLevelError, rulepkg.DMLCheckIndexSelectivity, "索引:v1,未超过区分度阈值:70,建议使用超过阈值的索引。"))

inspect2 := NewMockInspect(e)
handler.ExpectQuery(regexp.QuoteMeta("select * from exist_tb_6 where id in (select id from exist_tb_6 where v1='10')")).
WillReturnRows(sqlmock.NewRows([]string{"key", "table"}).AddRow("v1", "exist_tb_6").AddRow("primary", "exist_tb_6"))
handler.ExpectQuery(regexp.QuoteMeta("SHOW INDEX FROM exist_db.exist_tb_6")).
WillReturnRows(sqlmock.NewRows([]string{"Column_name", "Key_name"}).AddRow("v1", "v1").AddRow("primary", "id"))
handler.ExpectQuery(regexp.QuoteMeta("SELECT COUNT( DISTINCT ( v1 ) ) / COUNT( * ) * 100 AS v1 FROM exist_tb_6")).
WillReturnRows(sqlmock.NewRows([]string{"v1"}).
AddRow("50.0000"))
handler.ExpectQuery(regexp.QuoteMeta(`SELECT (s.CARDINALITY / t.TABLE_ROWS) * 100 AS INDEX_SELECTIVITY,s.INDEX_NAME FROM INFORMATION_SCHEMA.STATISTICS s JOIN INFORMATION_SCHEMA.TABLES t ON s.TABLE_SCHEMA = t.TABLE_SCHEMA AND s.TABLE_NAME = t.TABLE_NAME WHERE (s.TABLE_SCHEMA , s.TABLE_NAME , s.INDEX_NAME) IN (('exist_db', 'exist_tb_6', 'v1'));`)).
WillReturnRows(sqlmock.NewRows([]string{"INDEX_SELECTIVITY", "INDEX_NAME"}).
AddRow("50.0000", "v1"))
runSingleRuleInspectCase(rule, t, "", inspect2, "select * from exist_tb_6 where id in (select id from exist_tb_6 where v1='10')", newTestResult().add(driverV2.RuleLevelError, rulepkg.DMLCheckIndexSelectivity, "索引:v1,未超过区分度阈值:70,建议使用超过阈值的索引。"))

inspect3 := NewMockInspect(e)
handler.ExpectQuery(regexp.QuoteMeta("select * from exist_tb_6")).
WillReturnRows(sqlmock.NewRows([]string{"key", "table"}).AddRow("v1", "exist_tb_6"))
handler.ExpectQuery(regexp.QuoteMeta("SHOW INDEX FROM exist_db.exist_tb_6")).
WillReturnRows(sqlmock.NewRows([]string{"Column_name", "Key_name"}).AddRow("v1", "v1"))
handler.ExpectQuery(regexp.QuoteMeta("SELECT COUNT( DISTINCT ( v1 ) ) / COUNT( * ) * 100 AS v1 FROM exist_tb_6")).
WillReturnRows(sqlmock.NewRows([]string{"v1"}).
AddRow("80.0000"))
handler.ExpectQuery(regexp.QuoteMeta(`SELECT (s.CARDINALITY / t.TABLE_ROWS) * 100 AS INDEX_SELECTIVITY,s.INDEX_NAME FROM INFORMATION_SCHEMA.STATISTICS s JOIN INFORMATION_SCHEMA.TABLES t ON s.TABLE_SCHEMA = t.TABLE_SCHEMA AND s.TABLE_NAME = t.TABLE_NAME WHERE (s.TABLE_SCHEMA , s.TABLE_NAME , s.INDEX_NAME) IN (('exist_db', 'exist_tb_6', 'v1'));`)).
WillReturnRows(sqlmock.NewRows([]string{"INDEX_SELECTIVITY", "INDEX_NAME"}).
AddRow("80.0000", "v1"))
runSingleRuleInspectCase(rule, t, "", inspect3, "select * from exist_tb_6 where v1='10'", newTestResult())

inspect4 := NewMockInspect(e)
handler.ExpectQuery(regexp.QuoteMeta("select * from exist_tb_6 where id in (select id from exist_tb_6 where v1='10')")).
WillReturnRows(sqlmock.NewRows([]string{"key", "table"}).AddRow("v1", "exist_tb_6"))
handler.ExpectQuery(regexp.QuoteMeta("SHOW INDEX FROM exist_db.exist_tb_6")).
WillReturnRows(sqlmock.NewRows([]string{"Column_name", "Key_name"}).AddRow("v1", "v1"))
handler.ExpectQuery(regexp.QuoteMeta("SELECT COUNT( DISTINCT ( v1 ) ) / COUNT( * ) * 100 AS v1 FROM exist_tb_6")).
WillReturnRows(sqlmock.NewRows([]string{"v1"}).
AddRow("80.0000"))
handler.ExpectQuery(regexp.QuoteMeta("SHOW INDEX FROM exist_db.exist_tb_6")).
WillReturnRows(sqlmock.NewRows([]string{"Column_name", "Key_name"}).AddRow("v1", "v1"))
handler.ExpectQuery(regexp.QuoteMeta("SELECT COUNT( DISTINCT ( v1 ) ) / COUNT( * ) * 100 AS v1 FROM exist_tb_6")).
WillReturnRows(sqlmock.NewRows([]string{"v1"}).
AddRow("80.0000"))
handler.ExpectQuery(regexp.QuoteMeta(`SELECT (s.CARDINALITY / t.TABLE_ROWS) * 100 AS INDEX_SELECTIVITY,s.INDEX_NAME FROM INFORMATION_SCHEMA.STATISTICS s JOIN INFORMATION_SCHEMA.TABLES t ON s.TABLE_SCHEMA = t.TABLE_SCHEMA AND s.TABLE_NAME = t.TABLE_NAME WHERE (s.TABLE_SCHEMA , s.TABLE_NAME , s.INDEX_NAME) IN (('exist_db', 'exist_tb_6', 'v1'));`)).
WillReturnRows(sqlmock.NewRows([]string{"INDEX_SELECTIVITY", "INDEX_NAME"}).
AddRow("80.0000", "v1"))

handler.ExpectQuery(regexp.QuoteMeta(`SELECT (s.CARDINALITY / t.TABLE_ROWS) * 100 AS INDEX_SELECTIVITY,s.INDEX_NAME FROM INFORMATION_SCHEMA.STATISTICS s JOIN INFORMATION_SCHEMA.TABLES t ON s.TABLE_SCHEMA = t.TABLE_SCHEMA AND s.TABLE_NAME = t.TABLE_NAME WHERE (s.TABLE_SCHEMA , s.TABLE_NAME , s.INDEX_NAME) IN (('exist_db', 'exist_tb_6', 'v1'));`)).
WillReturnRows(sqlmock.NewRows([]string{"INDEX_SELECTIVITY", "INDEX_NAME"}).
AddRow("80.0000", "v1"))
runSingleRuleInspectCase(rule, t, "", inspect4, "select * from exist_tb_6 where id in (select id from exist_tb_6 where v1='10')", newTestResult())

}
Expand Down
51 changes: 22 additions & 29 deletions sqle/driver/mysql/rule/rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -5150,14 +5150,20 @@ func checkIndexOption(input *RuleHandlerInput) error {
if len(indexColumns) == 0 {
return nil
}
maxIndexOption, err := input.Ctx.GetMaxIndexOptionForTable(tableName, indexColumns)

columnSelectivityMap, err := input.Ctx.GetSelectivityOfColumns(tableName, indexColumns)
if err != nil {
return err
}
// todo: using number compare, don't use string compare
max := input.Rule.Params.GetParam(DefaultSingleParamKeyName).Int()

if maxIndexOption > 0 && float64(max) > maxIndexOption {
max := input.Rule.Params.GetParam(DefaultSingleParamKeyName).Int()
var maxSelectivity float64 = -1
for _, selectivity := range columnSelectivityMap {
if selectivity > maxSelectivity {
maxSelectivity = selectivity
}
}
if maxSelectivity > 0 && maxSelectivity < float64(max) {
addResult(input.Res, input.Rule, input.Rule.Name, strings.Join(indexColumns, ", "), max)
}
return nil
Expand Down Expand Up @@ -6535,31 +6541,22 @@ func checkColumnNotNull(input *RuleHandlerInput) error {
return nil
}

func getColumnFromIndexesInfoByIndexName(indexesInfo []*executor.TableIndexesInfo, indexName string) []string {
indexColumns := []string{}
for _, info := range indexesInfo {
if info.KeyName == indexName {
indexColumns = append(indexColumns, info.ColumnName)
}
}
return indexColumns
}

func checkIndexSelectivity(input *RuleHandlerInput) error {
if _, ok := input.Node.(*ast.SelectStmt); !ok {
return nil
}
selectVisitor := &util.SelectVisitor{}
input.Node.Accept(selectVisitor)
epRecords, err := input.Ctx.GetExecutionPlan(input.Node.Text())
explainRecords, err := input.Ctx.GetExecutionPlan(input.Node.Text())
if err != nil {
log.NewEntry().Errorf("get execution plan failed, sqle: %v, error: %v", input.Node.Text(), err)
return nil
}
for _, record := range epRecords {
recordKey := record.Key
for _, record := range explainRecords {
indexes := strings.Split(record.Key, ",")
recordTable := record.Table
if recordKey == "" || recordTable == "" {
if len(indexes) == 0 || recordTable == "" {
// 若执行计划没有使用索引 则跳过
continue
}
for _, selectNode := range selectVisitor.SelectList {
Expand All @@ -6569,23 +6566,19 @@ func checkIndexSelectivity(input *RuleHandlerInput) error {
tables := util.GetTables(selectNode.From.TableRefs)
for _, tableName := range tables {
if tableName.Name.L != recordTable {
// 只检查 使用索引对应的表
continue
}
schemaName := input.Ctx.GetSchemaName(tableName)
indexesInfo, err := input.Ctx.GetTableIndexesInfo(schemaName, tableName.Name.O)
if err != nil {
continue
}
indexColumns := getColumnFromIndexesInfoByIndexName(indexesInfo, recordKey)
maxIndexOption, err := input.Ctx.GetMaxIndexOptionForTable(tableName, indexColumns)
indexSelectivityMap, err := input.Ctx.GetSelectivityOfIndex(tableName, indexes)
if err != nil {
continue
}
max := input.Rule.Params.GetParam(DefaultSingleParamKeyName).Int()

if maxIndexOption > 0 && float64(max) > maxIndexOption {
addResult(input.Res, input.Rule, input.Rule.Name, recordKey, max)
return nil
for indexName, selectivity := range indexSelectivityMap {
if selectivity > 0 && selectivity < float64(max) {
addResult(input.Res, input.Rule, input.Rule.Name, indexName, max)
return nil
}
}
}
}
Expand Down
Loading
Loading