diff --git a/docs/_posts/2006-01-02-analytic-functions.md b/docs/_posts/2006-01-02-analytic-functions.md index 7d2b35a8..d7ae800d 100644 --- a/docs/_posts/2006-01-02-analytic-functions.md +++ b/docs/_posts/2006-01-02-analytic-functions.md @@ -36,7 +36,7 @@ _order_by_clause_ : [Order By Clause]({{ '/reference/select_query.html#order_by_clause' | relative_url }}) Analytic Functions sort the result set by _order_by_clause_ and calculate values within each of groups partitioned by _partition_clause_. -If there is no _partition_clause_, then all records of result set are dealt with as one group. +If there is no _partition_clause_, then all records of the result set are dealt with as one group. ## Definitions @@ -69,7 +69,7 @@ _partition_clause_ _order_by_clause_ : [Order By Clause]({{ '/reference/select_query.html#order_by_clause' | relative_url }}) -Returns ranks of records in a group. +Return ranks of records in a group. ### DENSE_RANK @@ -85,5 +85,5 @@ _partition_clause_ _order_by_clause_ : [Order By Clause]({{ '/reference/select_query.html#order_by_clause' | relative_url }}) -Returns ranks of records without any gaps in the ranking in a group. +Return ranks of records without any gaps in the ranking in a group. diff --git a/docs/_posts/2006-01-02-command.md b/docs/_posts/2006-01-02-command.md index 22fd29de..f3843d96 100644 --- a/docs/_posts/2006-01-02-command.md +++ b/docs/_posts/2006-01-02-command.md @@ -51,11 +51,11 @@ csvq [global options] [subcommand [subcommand options]] ["query"|"statements"] --version, -v : Print the version -> First line of a CSV file is dealt with as header line. In case "--no-header" option passed, +> First line of a CSV file is dealt with as the header line. In case "--no-header" option passed, > fields are automatically named as "c" and following sequential number. (exam. "c1", "c2", "c3", ...) -> In most cases CSV fields are imported as string value, but no-quoted empty fields are imported as null. -> By using "--without-null" option, no-quoted empty fields are imported as empty string value. +> In most cases CSV fields are imported as string values, but no-quoted empty fields are imported as null. +> By using "--without-null" option, no-quoted empty fields are imported as empty string values. > Some of global options can be specified in statements by using [Set Flag Statements]({{ '/reference/flag.html' | relative_url }}). diff --git a/docs/_posts/2006-01-02-set-operators.md b/docs/_posts/2006-01-02-set-operators.md index db3719bf..918b5425 100644 --- a/docs/_posts/2006-01-02-set-operators.md +++ b/docs/_posts/2006-01-02-set-operators.md @@ -13,7 +13,7 @@ category: reference | [INTERSECT](#intersect) | Return the intersection of result sets | A set operation combines result sets retrieved by select queries into a single result set. -If the ALL keyword is specified, the result is distinguished. +If the ALL keyword is not specified, the result is distinguished. ## UNION {: #union} diff --git a/docs/_posts/2006-01-02-statement.md b/docs/_posts/2006-01-02-statement.md index c5e1caa7..e0e17c4b 100644 --- a/docs/_posts/2006-01-02-statement.md +++ b/docs/_posts/2006-01-02-statement.md @@ -20,7 +20,7 @@ A statements is terminated with a semicolon. Stetaments are processed sequentially for each statement. In statements, character case is ignored. -If you want to execute a sigle query, you can omit a terminal semicolon. +If you want to execute a single query, you can omit the terminal semicolon. ```bash # Execute a single query diff --git a/lib/query/analytic_function.go b/lib/query/analytic_function.go index 7d8a528b..38e2a443 100644 --- a/lib/query/analytic_function.go +++ b/lib/query/analytic_function.go @@ -2,26 +2,33 @@ package query import ( "errors" + "sync" "github.com/mithrandie/csvq/lib/parser" "github.com/mithrandie/csvq/lib/ternary" ) -var AnalyticFunctions = map[string]func(*View, []parser.Expression, []partitionValue) error{ - "ROW_NUMBER": RowNumber, - "RANK": Rank, - "DENSE_RANK": DenseRank, +var AnalyticFunctions map[string]func(*View, []parser.Expression, parser.AnalyticClause) error +var defineAnalyticFunctions sync.Once + +func DefineAnalyticFunctions() { + defineAnalyticFunctions.Do(func() { + AnalyticFunctions = map[string]func(*View, []parser.Expression, parser.AnalyticClause) error{ + "ROW_NUMBER": RowNumber, + "RANK": Rank, + "DENSE_RANK": DenseRank, + } + }) } type partitionValue struct { - values []parser.Primary - orderValues []parser.Primary - number float64 - rank float64 + partitionValues []parser.Primary + orderValues []parser.Primary + values map[string]float64 } func (pv partitionValue) match(values []parser.Primary) bool { - for i, v := range pv.values { + for i, v := range pv.partitionValues { if EquivalentTo(v, values[i]) != ternary.TRUE { return false } @@ -49,84 +56,121 @@ func (pv partitionValues) searchIndex(values []parser.Primary) int { return -1 } -func RowNumber(view *View, args []parser.Expression, partitinList []partitionValue) error { +func RowNumber(view *View, args []parser.Expression, clause parser.AnalyticClause) error { if args != nil { - return errors.New("function ROW_NUMBER takes no argument") + return errors.New("analytic function ROW_NUMBER takes no argument") } partitions := partitionValues{} + var filter Filter = append([]FilterRecord{{View: view, RecordIndex: 0}}, view.parentFilter...) for i := range view.Records { + filter[0].RecordIndex = i + partitionValues, err := filter.evalValues(clause.PartitionValues()) + if err != nil { + return err + } + var idx int - if idx = partitions.searchIndex(partitinList[i].values); -1 < idx { - partitions[idx].number++ + if idx = partitions.searchIndex(partitionValues); -1 < idx { + partitions[idx].values["number"]++ } else { partitions = append(partitions, partitionValue{ - values: partitinList[i].values, - number: 1, + partitionValues: partitionValues, + values: map[string]float64{ + "number": 1, + }, }) idx = len(partitions) - 1 } - view.Records[i] = append(view.Records[i], NewCell(parser.NewInteger(int64(partitions[idx].number)))) + view.Records[i] = append(view.Records[i], NewCell(parser.NewInteger(int64(partitions[idx].values["number"])))) } return nil } -func Rank(view *View, args []parser.Expression, partitinList []partitionValue) error { +func Rank(view *View, args []parser.Expression, clause parser.AnalyticClause) error { if args != nil { - return errors.New("function RANK takes no argument") + return errors.New("analytic function RANK takes no argument") } partitions := partitionValues{} + var filter Filter = append([]FilterRecord{{View: view, RecordIndex: 0}}, view.parentFilter...) for i := range view.Records { + filter[0].RecordIndex = i + partitionValues, err := filter.evalValues(clause.PartitionValues()) + if err != nil { + return err + } + + orderValues, err := filter.evalValues(clause.OrderValues()) + if err != nil { + return err + } + var idx int - if idx = partitions.searchIndex(partitinList[i].values); -1 < idx { - partitions[idx].number++ - if !partitions[idx].isSameRank(partitinList[i].orderValues) { - partitions[idx].rank = partitions[idx].number + if idx = partitions.searchIndex(partitionValues); -1 < idx { + partitions[idx].values["number"]++ + if !partitions[idx].isSameRank(orderValues) { + partitions[idx].values["rank"] = partitions[idx].values["number"] } } else { partitions = append(partitions, partitionValue{ - values: partitinList[i].values, - orderValues: partitinList[i].orderValues, - number: 1, - rank: 1, + partitionValues: partitionValues, + orderValues: orderValues, + values: map[string]float64{ + "number": 1, + "rank": 1, + }, }) idx = len(partitions) - 1 } - view.Records[i] = append(view.Records[i], NewCell(parser.NewInteger(int64(partitions[idx].rank)))) + view.Records[i] = append(view.Records[i], NewCell(parser.NewInteger(int64(partitions[idx].values["rank"])))) } return nil } -func DenseRank(view *View, args []parser.Expression, partitinList []partitionValue) error { +func DenseRank(view *View, args []parser.Expression, clause parser.AnalyticClause) error { if args != nil { - return errors.New("function DENSE_RANK takes no argument") + return errors.New("analytic function DENSE_RANK takes no argument") } partitions := partitionValues{} + var filter Filter = append([]FilterRecord{{View: view, RecordIndex: 0}}, view.parentFilter...) for i := range view.Records { + filter[0].RecordIndex = i + partitionValues, err := filter.evalValues(clause.PartitionValues()) + if err != nil { + return err + } + + orderValues, err := filter.evalValues(clause.OrderValues()) + if err != nil { + return err + } + var idx int - if idx = partitions.searchIndex(partitinList[i].values); -1 < idx { - if !partitions[idx].isSameRank(partitinList[i].orderValues) { - partitions[idx].rank++ + if idx = partitions.searchIndex(partitionValues); -1 < idx { + if !partitions[idx].isSameRank(orderValues) { + partitions[idx].values["rank"]++ } } else { partitions = append(partitions, partitionValue{ - values: partitinList[i].values, - orderValues: partitinList[i].orderValues, - rank: 1, + partitionValues: partitionValues, + orderValues: orderValues, + values: map[string]float64{ + "rank": 1, + }, }) idx = len(partitions) - 1 } - view.Records[i] = append(view.Records[i], NewCell(parser.NewInteger(int64(partitions[idx].rank)))) + view.Records[i] = append(view.Records[i], NewCell(parser.NewInteger(int64(partitions[idx].values["rank"])))) } return nil diff --git a/lib/query/analytic_function_test.go b/lib/query/analytic_function_test.go index 58d5237b..9cd606bf 100644 --- a/lib/query/analytic_function_test.go +++ b/lib/query/analytic_function_test.go @@ -8,18 +8,18 @@ import ( ) type analyticFunctionTest struct { - Name string - View *View - Args []parser.Expression - PartitionList []partitionValue - Result *View - Error string + Name string + View *View + Args []parser.Expression + Clause parser.AnalyticClause + Result *View + Error string } -func testAnalyticFunction(t *testing.T, f func(*View, []parser.Expression, []partitionValue) error, tests []analyticFunctionTest) { +func testAnalyticFunction(t *testing.T, f func(*View, []parser.Expression, parser.AnalyticClause) error, tests []analyticFunctionTest) { for _, v := range tests { ViewCache.Clear() - err := f(v.View, v.Args, v.PartitionList) + err := f(v.View, v.Args, v.Clause) if err != nil { if len(v.Error) < 1 { t.Errorf("%s: unexpected error %q", v.Name, err) @@ -66,21 +66,13 @@ var rowNumberTests = []analyticFunctionTest{ }), }, }, - PartitionList: []partitionValue{ - { - orderValues: []parser.Primary{parser.NewInteger(1)}, - }, - { - orderValues: []parser.Primary{parser.NewInteger(2)}, - }, - { - orderValues: []parser.Primary{parser.NewInteger(3)}, - }, - { - orderValues: []parser.Primary{parser.NewInteger(4)}, - }, - { - orderValues: []parser.Primary{parser.NewInteger(5)}, + Clause: parser.AnalyticClause{ + OrderByClause: parser.OrderByClause{ + Items: []parser.Expression{ + parser.OrderItem{ + Value: parser.FieldReference{Column: parser.Identifier{Literal: "column2"}}, + }, + }, }, }, Result: &View{ @@ -141,21 +133,18 @@ var rowNumberTests = []analyticFunctionTest{ }), }, }, - PartitionList: []partitionValue{ - { - values: []parser.Primary{parser.NewString("a")}, - }, - { - values: []parser.Primary{parser.NewString("a")}, - }, - { - values: []parser.Primary{parser.NewString("b")}, + Clause: parser.AnalyticClause{ + Partition: parser.Partition{ + Values: []parser.Expression{ + parser.FieldReference{Column: parser.Identifier{Literal: "column1"}}, + }, }, - { - values: []parser.Primary{parser.NewString("b")}, - }, - { - values: []parser.Primary{parser.NewString("b")}, + OrderByClause: parser.OrderByClause{ + Items: []parser.Expression{ + parser.OrderItem{ + Value: parser.FieldReference{Column: parser.Identifier{Literal: "column2"}}, + }, + }, }, }, Result: &View{ @@ -207,7 +196,50 @@ var rowNumberTests = []analyticFunctionTest{ Args: []parser.Expression{ parser.NewInteger(1), }, - Error: "function ROW_NUMBER takes no argument", + Error: "analytic function ROW_NUMBER takes no argument", + }, + { + Name: "RowNumber Partition Value Error", + View: &View{ + Header: NewHeaderWithoutId("table1", []string{"column1", "column2"}), + Records: []Record{ + NewRecordWithoutId([]parser.Primary{ + parser.NewString("a"), + parser.NewInteger(1), + }), + NewRecordWithoutId([]parser.Primary{ + parser.NewString("a"), + parser.NewInteger(2), + }), + NewRecordWithoutId([]parser.Primary{ + parser.NewString("b"), + parser.NewInteger(3), + }), + NewRecordWithoutId([]parser.Primary{ + parser.NewString("b"), + parser.NewInteger(4), + }), + NewRecordWithoutId([]parser.Primary{ + parser.NewString("b"), + parser.NewInteger(5), + }), + }, + }, + Clause: parser.AnalyticClause{ + Partition: parser.Partition{ + Values: []parser.Expression{ + parser.FieldReference{Column: parser.Identifier{Literal: "notexist"}}, + }, + }, + OrderByClause: parser.OrderByClause{ + Items: []parser.Expression{ + parser.OrderItem{ + Value: parser.FieldReference{Column: parser.Identifier{Literal: "column2"}}, + }, + }, + }, + }, + Error: "field notexist does not exist", }, } @@ -243,26 +275,18 @@ var rankTests = []analyticFunctionTest{ }), }, }, - PartitionList: []partitionValue{ - { - values: []parser.Primary{parser.NewString("a")}, - orderValues: []parser.Primary{parser.NewInteger(1)}, - }, - { - values: []parser.Primary{parser.NewString("b")}, - orderValues: []parser.Primary{parser.NewInteger(1)}, - }, - { - values: []parser.Primary{parser.NewString("b")}, - orderValues: []parser.Primary{parser.NewInteger(1)}, + Clause: parser.AnalyticClause{ + Partition: parser.Partition{ + Values: []parser.Expression{ + parser.FieldReference{Column: parser.Identifier{Literal: "column1"}}, + }, }, - { - values: []parser.Primary{parser.NewString("b")}, - orderValues: []parser.Primary{parser.NewInteger(2)}, - }, - { - values: []parser.Primary{parser.NewString("a")}, - orderValues: []parser.Primary{parser.NewInteger(2)}, + OrderByClause: parser.OrderByClause{ + Items: []parser.Expression{ + parser.OrderItem{ + Value: parser.FieldReference{Column: parser.Identifier{Literal: "column2"}}, + }, + }, }, }, Result: &View{ @@ -326,7 +350,93 @@ var rankTests = []analyticFunctionTest{ Args: []parser.Expression{ parser.NewInteger(1), }, - Error: "function RANK takes no argument", + Error: "analytic function RANK takes no argument", + }, + { + Name: "Rank Partition Value Error", + View: &View{ + Header: NewHeaderWithoutId("table1", []string{"column1", "column2"}), + Records: []Record{ + NewRecordWithoutId([]parser.Primary{ + parser.NewString("a"), + parser.NewInteger(1), + }), + NewRecordWithoutId([]parser.Primary{ + parser.NewString("b"), + parser.NewInteger(1), + }), + NewRecordWithoutId([]parser.Primary{ + parser.NewString("b"), + parser.NewInteger(1), + }), + NewRecordWithoutId([]parser.Primary{ + parser.NewString("b"), + parser.NewInteger(2), + }), + NewRecordWithoutId([]parser.Primary{ + parser.NewString("a"), + parser.NewInteger(2), + }), + }, + }, + Clause: parser.AnalyticClause{ + Partition: parser.Partition{ + Values: []parser.Expression{ + parser.FieldReference{Column: parser.Identifier{Literal: "notexist"}}, + }, + }, + OrderByClause: parser.OrderByClause{ + Items: []parser.Expression{ + parser.OrderItem{ + Value: parser.FieldReference{Column: parser.Identifier{Literal: "column2"}}, + }, + }, + }, + }, + Error: "field notexist does not exist", + }, + { + Name: "Rank Order Value Error", + View: &View{ + Header: NewHeaderWithoutId("table1", []string{"column1", "column2"}), + Records: []Record{ + NewRecordWithoutId([]parser.Primary{ + parser.NewString("a"), + parser.NewInteger(1), + }), + NewRecordWithoutId([]parser.Primary{ + parser.NewString("b"), + parser.NewInteger(1), + }), + NewRecordWithoutId([]parser.Primary{ + parser.NewString("b"), + parser.NewInteger(1), + }), + NewRecordWithoutId([]parser.Primary{ + parser.NewString("b"), + parser.NewInteger(2), + }), + NewRecordWithoutId([]parser.Primary{ + parser.NewString("a"), + parser.NewInteger(2), + }), + }, + }, + Clause: parser.AnalyticClause{ + Partition: parser.Partition{ + Values: []parser.Expression{ + parser.FieldReference{Column: parser.Identifier{Literal: "column1"}}, + }, + }, + OrderByClause: parser.OrderByClause{ + Items: []parser.Expression{ + parser.OrderItem{ + Value: parser.FieldReference{Column: parser.Identifier{Literal: "notexist"}}, + }, + }, + }, + }, + Error: "field notexist does not exist", }, } @@ -362,26 +472,18 @@ var denseRankTests = []analyticFunctionTest{ }), }, }, - PartitionList: []partitionValue{ - { - values: []parser.Primary{parser.NewString("a")}, - orderValues: []parser.Primary{parser.NewInteger(1)}, - }, - { - values: []parser.Primary{parser.NewString("b")}, - orderValues: []parser.Primary{parser.NewInteger(1)}, - }, - { - values: []parser.Primary{parser.NewString("b")}, - orderValues: []parser.Primary{parser.NewInteger(1)}, - }, - { - values: []parser.Primary{parser.NewString("b")}, - orderValues: []parser.Primary{parser.NewInteger(2)}, + Clause: parser.AnalyticClause{ + Partition: parser.Partition{ + Values: []parser.Expression{ + parser.FieldReference{Column: parser.Identifier{Literal: "column1"}}, + }, }, - { - values: []parser.Primary{parser.NewString("a")}, - orderValues: []parser.Primary{parser.NewInteger(2)}, + OrderByClause: parser.OrderByClause{ + Items: []parser.Expression{ + parser.OrderItem{ + Value: parser.FieldReference{Column: parser.Identifier{Literal: "column2"}}, + }, + }, }, }, Result: &View{ @@ -445,7 +547,93 @@ var denseRankTests = []analyticFunctionTest{ Args: []parser.Expression{ parser.NewInteger(1), }, - Error: "function DENSE_RANK takes no argument", + Error: "analytic function DENSE_RANK takes no argument", + }, + { + Name: "DenseRank Partition Value Error", + View: &View{ + Header: NewHeaderWithoutId("table1", []string{"column1", "column2"}), + Records: []Record{ + NewRecordWithoutId([]parser.Primary{ + parser.NewString("a"), + parser.NewInteger(1), + }), + NewRecordWithoutId([]parser.Primary{ + parser.NewString("b"), + parser.NewInteger(1), + }), + NewRecordWithoutId([]parser.Primary{ + parser.NewString("b"), + parser.NewInteger(1), + }), + NewRecordWithoutId([]parser.Primary{ + parser.NewString("b"), + parser.NewInteger(2), + }), + NewRecordWithoutId([]parser.Primary{ + parser.NewString("a"), + parser.NewInteger(2), + }), + }, + }, + Clause: parser.AnalyticClause{ + Partition: parser.Partition{ + Values: []parser.Expression{ + parser.FieldReference{Column: parser.Identifier{Literal: "notexist"}}, + }, + }, + OrderByClause: parser.OrderByClause{ + Items: []parser.Expression{ + parser.OrderItem{ + Value: parser.FieldReference{Column: parser.Identifier{Literal: "column2"}}, + }, + }, + }, + }, + Error: "field notexist does not exist", + }, + { + Name: "DenseRank Order Value Error", + View: &View{ + Header: NewHeaderWithoutId("table1", []string{"column1", "column2"}), + Records: []Record{ + NewRecordWithoutId([]parser.Primary{ + parser.NewString("a"), + parser.NewInteger(1), + }), + NewRecordWithoutId([]parser.Primary{ + parser.NewString("b"), + parser.NewInteger(1), + }), + NewRecordWithoutId([]parser.Primary{ + parser.NewString("b"), + parser.NewInteger(1), + }), + NewRecordWithoutId([]parser.Primary{ + parser.NewString("b"), + parser.NewInteger(2), + }), + NewRecordWithoutId([]parser.Primary{ + parser.NewString("a"), + parser.NewInteger(2), + }), + }, + }, + Clause: parser.AnalyticClause{ + Partition: parser.Partition{ + Values: []parser.Expression{ + parser.FieldReference{Column: parser.Identifier{Literal: "column1"}}, + }, + }, + OrderByClause: parser.OrderByClause{ + Items: []parser.Expression{ + parser.OrderItem{ + Value: parser.FieldReference{Column: parser.Identifier{Literal: "notexist"}}, + }, + }, + }, + }, + Error: "field notexist does not exist", }, } diff --git a/lib/query/view.go b/lib/query/view.go index 42b59b53..eec4c0f0 100644 --- a/lib/query/view.go +++ b/lib/query/view.go @@ -731,6 +731,8 @@ func (view *View) evalColumn(obj parser.Expression, column string, alias string) } func (view *View) evalAnalyticFunction(expr parser.AnalyticFunction) error { + DefineAnalyticFunctions() + name := strings.ToUpper(expr.Name) fn, ok := AnalyticFunctions[name] if !ok { @@ -748,28 +750,7 @@ func (view *View) evalAnalyticFunction(expr parser.AnalyticFunction) error { } } - partitionList := make([]partitionValue, view.RecordLen()) - - var filter Filter = append([]FilterRecord{{View: view, RecordIndex: 0}}, view.parentFilter...) - for i := range view.Records { - filter[0].RecordIndex = i - values, err := filter.evalValues(expr.AnalyticClause.PartitionValues()) - if err != nil { - return err - } - - orderValues, err := filter.evalValues(expr.AnalyticClause.OrderValues()) - if err != nil { - return err - } - - partitionList[i] = partitionValue{ - values: values, - orderValues: orderValues, - } - } - - return fn(view, expr.Option.Args, partitionList) + return fn(view, expr.Option.Args, expr.AnalyticClause) } func (view *View) Offset(clause parser.OffsetClause) error {