Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
mithrandie committed Jul 4, 2017
2 parents 25bcfe6 + 6ab90de commit ca0f38c
Show file tree
Hide file tree
Showing 7 changed files with 357 additions and 144 deletions.
6 changes: 3 additions & 3 deletions docs/_posts/2006-01-02-analytic-functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ _order_by_clause_
: [Order By Clause]({{ '/reference/select_query.html#order_by_clause' | relative_url }})

Analytic Functions sort the result set by _order_by_clause_ and calculate values within each of groups partitioned by _partition_clause_.
If there is no _partition_clause_, then all records of result set are dealt with as one group.
If there is no _partition_clause_, then all records of the result set are dealt with as one group.

## Definitions

Expand Down Expand Up @@ -69,7 +69,7 @@ _partition_clause_
_order_by_clause_
: [Order By Clause]({{ '/reference/select_query.html#order_by_clause' | relative_url }})

Returns ranks of records in a group.
Return ranks of records in a group.


### DENSE_RANK
Expand All @@ -85,5 +85,5 @@ _partition_clause_
_order_by_clause_
: [Order By Clause]({{ '/reference/select_query.html#order_by_clause' | relative_url }})

Returns ranks of records without any gaps in the ranking in a group.
Return ranks of records without any gaps in the ranking in a group.

6 changes: 3 additions & 3 deletions docs/_posts/2006-01-02-command.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,11 @@ csvq [global options] [subcommand [subcommand options]] ["query"|"statements"]
--version, -v
: Print the version
> First line of a CSV file is dealt with as header line. In case "--no-header" option passed,
> First line of a CSV file is dealt with as the header line. In case "--no-header" option passed,
> fields are automatically named as "c" and following sequential number. (exam. "c1", "c2", "c3", ...)
> In most cases CSV fields are imported as string value, but no-quoted empty fields are imported as null.
> By using "--without-null" option, no-quoted empty fields are imported as empty string value.
> In most cases CSV fields are imported as string values, but no-quoted empty fields are imported as null.
> By using "--without-null" option, no-quoted empty fields are imported as empty string values.
> Some of global options can be specified in statements by using [Set Flag Statements]({{ '/reference/flag.html' | relative_url }}).
Expand Down
2 changes: 1 addition & 1 deletion docs/_posts/2006-01-02-set-operators.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ category: reference
| [INTERSECT](#intersect) | Return the intersection of result sets |

A set operation combines result sets retrieved by select queries into a single result set.
If the ALL keyword is specified, the result is distinguished.
If the ALL keyword is not specified, the result is distinguished.

## UNION
{: #union}
Expand Down
2 changes: 1 addition & 1 deletion docs/_posts/2006-01-02-statement.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ A statements is terminated with a semicolon.
Stetaments are processed sequentially for each statement.
In statements, character case is ignored.

If you want to execute a sigle query, you can omit a terminal semicolon.
If you want to execute a single query, you can omit the terminal semicolon.

```bash
# Execute a single query
Expand Down
116 changes: 80 additions & 36 deletions lib/query/analytic_function.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,33 @@ package query

import (
"errors"
"sync"

"github.com/mithrandie/csvq/lib/parser"
"github.com/mithrandie/csvq/lib/ternary"
)

var AnalyticFunctions = map[string]func(*View, []parser.Expression, []partitionValue) error{
"ROW_NUMBER": RowNumber,
"RANK": Rank,
"DENSE_RANK": DenseRank,
var AnalyticFunctions map[string]func(*View, []parser.Expression, parser.AnalyticClause) error
var defineAnalyticFunctions sync.Once

func DefineAnalyticFunctions() {
defineAnalyticFunctions.Do(func() {
AnalyticFunctions = map[string]func(*View, []parser.Expression, parser.AnalyticClause) error{
"ROW_NUMBER": RowNumber,
"RANK": Rank,
"DENSE_RANK": DenseRank,
}
})
}

type partitionValue struct {
values []parser.Primary
orderValues []parser.Primary
number float64
rank float64
partitionValues []parser.Primary
orderValues []parser.Primary
values map[string]float64
}

func (pv partitionValue) match(values []parser.Primary) bool {
for i, v := range pv.values {
for i, v := range pv.partitionValues {
if EquivalentTo(v, values[i]) != ternary.TRUE {
return false
}
Expand Down Expand Up @@ -49,84 +56,121 @@ func (pv partitionValues) searchIndex(values []parser.Primary) int {
return -1
}

func RowNumber(view *View, args []parser.Expression, partitinList []partitionValue) error {
func RowNumber(view *View, args []parser.Expression, clause parser.AnalyticClause) error {
if args != nil {
return errors.New("function ROW_NUMBER takes no argument")
return errors.New("analytic function ROW_NUMBER takes no argument")
}

partitions := partitionValues{}

var filter Filter = append([]FilterRecord{{View: view, RecordIndex: 0}}, view.parentFilter...)
for i := range view.Records {
filter[0].RecordIndex = i
partitionValues, err := filter.evalValues(clause.PartitionValues())
if err != nil {
return err
}

var idx int
if idx = partitions.searchIndex(partitinList[i].values); -1 < idx {
partitions[idx].number++
if idx = partitions.searchIndex(partitionValues); -1 < idx {
partitions[idx].values["number"]++
} else {
partitions = append(partitions, partitionValue{
values: partitinList[i].values,
number: 1,
partitionValues: partitionValues,
values: map[string]float64{
"number": 1,
},
})
idx = len(partitions) - 1
}

view.Records[i] = append(view.Records[i], NewCell(parser.NewInteger(int64(partitions[idx].number))))
view.Records[i] = append(view.Records[i], NewCell(parser.NewInteger(int64(partitions[idx].values["number"]))))
}

return nil
}

func Rank(view *View, args []parser.Expression, partitinList []partitionValue) error {
func Rank(view *View, args []parser.Expression, clause parser.AnalyticClause) error {
if args != nil {
return errors.New("function RANK takes no argument")
return errors.New("analytic function RANK takes no argument")
}

partitions := partitionValues{}

var filter Filter = append([]FilterRecord{{View: view, RecordIndex: 0}}, view.parentFilter...)
for i := range view.Records {
filter[0].RecordIndex = i
partitionValues, err := filter.evalValues(clause.PartitionValues())
if err != nil {
return err
}

orderValues, err := filter.evalValues(clause.OrderValues())
if err != nil {
return err
}

var idx int
if idx = partitions.searchIndex(partitinList[i].values); -1 < idx {
partitions[idx].number++
if !partitions[idx].isSameRank(partitinList[i].orderValues) {
partitions[idx].rank = partitions[idx].number
if idx = partitions.searchIndex(partitionValues); -1 < idx {
partitions[idx].values["number"]++
if !partitions[idx].isSameRank(orderValues) {
partitions[idx].values["rank"] = partitions[idx].values["number"]
}
} else {
partitions = append(partitions, partitionValue{
values: partitinList[i].values,
orderValues: partitinList[i].orderValues,
number: 1,
rank: 1,
partitionValues: partitionValues,
orderValues: orderValues,
values: map[string]float64{
"number": 1,
"rank": 1,
},
})
idx = len(partitions) - 1
}

view.Records[i] = append(view.Records[i], NewCell(parser.NewInteger(int64(partitions[idx].rank))))
view.Records[i] = append(view.Records[i], NewCell(parser.NewInteger(int64(partitions[idx].values["rank"]))))
}

return nil
}

func DenseRank(view *View, args []parser.Expression, partitinList []partitionValue) error {
func DenseRank(view *View, args []parser.Expression, clause parser.AnalyticClause) error {
if args != nil {
return errors.New("function DENSE_RANK takes no argument")
return errors.New("analytic function DENSE_RANK takes no argument")
}

partitions := partitionValues{}

var filter Filter = append([]FilterRecord{{View: view, RecordIndex: 0}}, view.parentFilter...)
for i := range view.Records {
filter[0].RecordIndex = i
partitionValues, err := filter.evalValues(clause.PartitionValues())
if err != nil {
return err
}

orderValues, err := filter.evalValues(clause.OrderValues())
if err != nil {
return err
}

var idx int
if idx = partitions.searchIndex(partitinList[i].values); -1 < idx {
if !partitions[idx].isSameRank(partitinList[i].orderValues) {
partitions[idx].rank++
if idx = partitions.searchIndex(partitionValues); -1 < idx {
if !partitions[idx].isSameRank(orderValues) {
partitions[idx].values["rank"]++
}
} else {
partitions = append(partitions, partitionValue{
values: partitinList[i].values,
orderValues: partitinList[i].orderValues,
rank: 1,
partitionValues: partitionValues,
orderValues: orderValues,
values: map[string]float64{
"rank": 1,
},
})
idx = len(partitions) - 1
}

view.Records[i] = append(view.Records[i], NewCell(parser.NewInteger(int64(partitions[idx].rank))))
view.Records[i] = append(view.Records[i], NewCell(parser.NewInteger(int64(partitions[idx].values["rank"]))))
}

return nil
Expand Down
Loading

0 comments on commit ca0f38c

Please sign in to comment.