Merge branch 'develop'

mithrandie · Jul 4, 2017 · ca0f38c · ca0f38c
2 parents 25bcfe6 + 6ab90de
commit ca0f38c
Show file tree

Hide file tree

Showing 7 changed files with 357 additions and 144 deletions.
diff --git a/docs/_posts/2006-01-02-analytic-functions.md b/docs/_posts/2006-01-02-analytic-functions.md
@@ -36,7 +36,7 @@ _order_by_clause_
 : [Order By Clause]({{ '/reference/select_query.html#order_by_clause' | relative_url }})
 
 Analytic Functions sort the result set by _order_by_clause_ and calculate values within each of groups partitioned by _partition_clause_.
-If there is no _partition_clause_, then all records of result set are dealt with as one group. 
+If there is no _partition_clause_, then all records of the result set are dealt with as one group. 
 
 ## Definitions
 
@@ -69,7 +69,7 @@ _partition_clause_
 _order_by_clause_
 : [Order By Clause]({{ '/reference/select_query.html#order_by_clause' | relative_url }})
 
-Returns ranks of records in a group.
+Return ranks of records in a group.
 
 
 ### DENSE_RANK
@@ -85,5 +85,5 @@ _partition_clause_
 _order_by_clause_
 : [Order By Clause]({{ '/reference/select_query.html#order_by_clause' | relative_url }})
 
-Returns ranks of records without any gaps in the ranking in a group.
+Return ranks of records without any gaps in the ranking in a group.
 
diff --git a/docs/_posts/2006-01-02-command.md b/docs/_posts/2006-01-02-command.md
@@ -51,11 +51,11 @@ csvq [global options] [subcommand [subcommand options]] ["query"|"statements"]
 --version, -v
 : Print the version
 
-> First line of a CSV file is dealt with as header line. In case "--no-header" option passed, 
+> First line of a CSV file is dealt with as the header line. In case "--no-header" option passed, 
 > fields are automatically named as "c" and following sequential number. (exam. "c1", "c2", "c3", ...)
 
-> In most cases CSV fields are imported as string value, but no-quoted empty fields are imported as null.
-> By using "--without-null" option, no-quoted empty fields are imported as empty string value.
+> In most cases CSV fields are imported as string values, but no-quoted empty fields are imported as null.
+> By using "--without-null" option, no-quoted empty fields are imported as empty string values.
 
 > Some of global options can be specified in statements by using [Set Flag Statements]({{ '/reference/flag.html' | relative_url }}).
 

diff --git a/docs/_posts/2006-01-02-set-operators.md b/docs/_posts/2006-01-02-set-operators.md
@@ -13,7 +13,7 @@ category: reference
 | [INTERSECT](#intersect) | Return the intersection of result sets |
 
 A set operation combines result sets retrieved by select queries into a single result set.
-If the ALL keyword is specified, the result is distinguished.
+If the ALL keyword is not specified, the result is distinguished.
 
 ## UNION
 {: #union}

diff --git a/docs/_posts/2006-01-02-statement.md b/docs/_posts/2006-01-02-statement.md
@@ -20,7 +20,7 @@ A statements is terminated with a semicolon.
 Stetaments are processed sequentially for each statement.
 In statements, character case is ignored.
 
-If you want to execute a sigle query, you can omit a terminal semicolon.  
+If you want to execute a single query, you can omit the terminal semicolon.  
 
 ```bash
 # Execute a single query

diff --git a/lib/query/analytic_function.go b/lib/query/analytic_function.go
@@ -2,26 +2,33 @@ package query
 
 import (
 	"errors"
+	"sync"
 
 	"github.com/mithrandie/csvq/lib/parser"
 	"github.com/mithrandie/csvq/lib/ternary"
 )
 
-var AnalyticFunctions = map[string]func(*View, []parser.Expression, []partitionValue) error{
-	"ROW_NUMBER": RowNumber,
-	"RANK":       Rank,
-	"DENSE_RANK": DenseRank,
+var AnalyticFunctions map[string]func(*View, []parser.Expression, parser.AnalyticClause) error
+var defineAnalyticFunctions sync.Once
+
+func DefineAnalyticFunctions() {
+	defineAnalyticFunctions.Do(func() {
+		AnalyticFunctions = map[string]func(*View, []parser.Expression, parser.AnalyticClause) error{
+			"ROW_NUMBER": RowNumber,
+			"RANK":       Rank,
+			"DENSE_RANK": DenseRank,
+		}
+	})
 }
 
 type partitionValue struct {
-	values      []parser.Primary
-	orderValues []parser.Primary
-	number      float64
-	rank        float64
+	partitionValues []parser.Primary
+	orderValues     []parser.Primary
+	values          map[string]float64
 }
 
 func (pv partitionValue) match(values []parser.Primary) bool {
-	for i, v := range pv.values {
+	for i, v := range pv.partitionValues {
 		if EquivalentTo(v, values[i]) != ternary.TRUE {
 			return false
 		}
@@ -49,84 +56,121 @@ func (pv partitionValues) searchIndex(values []parser.Primary) int {
 	return -1
 }
 
-func RowNumber(view *View, args []parser.Expression, partitinList []partitionValue) error {
+func RowNumber(view *View, args []parser.Expression, clause parser.AnalyticClause) error {
 	if args != nil {
-		return errors.New("function ROW_NUMBER takes no argument")
+		return errors.New("analytic function ROW_NUMBER takes no argument")
 	}
 
 	partitions := partitionValues{}
 
+	var filter Filter = append([]FilterRecord{{View: view, RecordIndex: 0}}, view.parentFilter...)
 	for i := range view.Records {
+		filter[0].RecordIndex = i
+		partitionValues, err := filter.evalValues(clause.PartitionValues())
+		if err != nil {
+			return err
+		}
+
 		var idx int
-		if idx = partitions.searchIndex(partitinList[i].values); -1 < idx {
-			partitions[idx].number++
+		if idx = partitions.searchIndex(partitionValues); -1 < idx {
+			partitions[idx].values["number"]++
 		} else {
 			partitions = append(partitions, partitionValue{
-				values: partitinList[i].values,
-				number: 1,
+				partitionValues: partitionValues,
+				values: map[string]float64{
+					"number": 1,
+				},
 			})
 			idx = len(partitions) - 1
 		}
 
-		view.Records[i] = append(view.Records[i], NewCell(parser.NewInteger(int64(partitions[idx].number))))
+		view.Records[i] = append(view.Records[i], NewCell(parser.NewInteger(int64(partitions[idx].values["number"]))))
 	}
 
 	return nil
 }
 
-func Rank(view *View, args []parser.Expression, partitinList []partitionValue) error {
+func Rank(view *View, args []parser.Expression, clause parser.AnalyticClause) error {
 	if args != nil {
-		return errors.New("function RANK takes no argument")
+		return errors.New("analytic function RANK takes no argument")
 	}
 
 	partitions := partitionValues{}
 
+	var filter Filter = append([]FilterRecord{{View: view, RecordIndex: 0}}, view.parentFilter...)
 	for i := range view.Records {
+		filter[0].RecordIndex = i
+		partitionValues, err := filter.evalValues(clause.PartitionValues())
+		if err != nil {
+			return err
+		}
+
+		orderValues, err := filter.evalValues(clause.OrderValues())
+		if err != nil {
+			return err
+		}
+
 		var idx int
-		if idx = partitions.searchIndex(partitinList[i].values); -1 < idx {
-			partitions[idx].number++
-			if !partitions[idx].isSameRank(partitinList[i].orderValues) {
-				partitions[idx].rank = partitions[idx].number
+		if idx = partitions.searchIndex(partitionValues); -1 < idx {
+			partitions[idx].values["number"]++
+			if !partitions[idx].isSameRank(orderValues) {
+				partitions[idx].values["rank"] = partitions[idx].values["number"]
 			}
 		} else {
 			partitions = append(partitions, partitionValue{
-				values:      partitinList[i].values,
-				orderValues: partitinList[i].orderValues,
-				number:      1,
-				rank:        1,
+				partitionValues: partitionValues,
+				orderValues:     orderValues,
+				values: map[string]float64{
+					"number": 1,
+					"rank":   1,
+				},
 			})
 			idx = len(partitions) - 1
 		}
 
-		view.Records[i] = append(view.Records[i], NewCell(parser.NewInteger(int64(partitions[idx].rank))))
+		view.Records[i] = append(view.Records[i], NewCell(parser.NewInteger(int64(partitions[idx].values["rank"]))))
 	}
 
 	return nil
 }
 
-func DenseRank(view *View, args []parser.Expression, partitinList []partitionValue) error {
+func DenseRank(view *View, args []parser.Expression, clause parser.AnalyticClause) error {
 	if args != nil {
-		return errors.New("function DENSE_RANK takes no argument")
+		return errors.New("analytic function DENSE_RANK takes no argument")
 	}
 
 	partitions := partitionValues{}
 
+	var filter Filter = append([]FilterRecord{{View: view, RecordIndex: 0}}, view.parentFilter...)
 	for i := range view.Records {
+		filter[0].RecordIndex = i
+		partitionValues, err := filter.evalValues(clause.PartitionValues())
+		if err != nil {
+			return err
+		}
+
+		orderValues, err := filter.evalValues(clause.OrderValues())
+		if err != nil {
+			return err
+		}
+
 		var idx int
-		if idx = partitions.searchIndex(partitinList[i].values); -1 < idx {
-			if !partitions[idx].isSameRank(partitinList[i].orderValues) {
-				partitions[idx].rank++
+		if idx = partitions.searchIndex(partitionValues); -1 < idx {
+			if !partitions[idx].isSameRank(orderValues) {
+				partitions[idx].values["rank"]++
 			}
 		} else {
 			partitions = append(partitions, partitionValue{
-				values:      partitinList[i].values,
-				orderValues: partitinList[i].orderValues,
-				rank:        1,
+				partitionValues: partitionValues,
+				orderValues:     orderValues,
+				values: map[string]float64{
+					"rank": 1,
+				},
 			})
 			idx = len(partitions) - 1
 		}
 
-		view.Records[i] = append(view.Records[i], NewCell(parser.NewInteger(int64(partitions[idx].rank))))
+		view.Records[i] = append(view.Records[i], NewCell(parser.NewInteger(int64(partitions[idx].values["rank"]))))
 	}
 
 	return nil