Skip to content

Commit

Permalink
Merge branch 'supoprt_ansi_quotes' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
mithrandie committed May 19, 2019
2 parents b080151 + b2f772d commit 0d36f81
Show file tree
Hide file tree
Showing 33 changed files with 307 additions and 112 deletions.
5 changes: 4 additions & 1 deletion docs/_posts/2006-01-02-command.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ csvq >

This option can be specified multiple formats using JSON array of strings.

--ansi-quotes, -k
: Use double quotation mark (U+0022 `"`) as identifier enclosure.

--wait-timeout value, -w value
: Limit of the waiting time in seconds to wait for locked files to be released. The default is 10.

Expand Down Expand Up @@ -516,7 +519,7 @@ In command parameters and statements, following strings represent special charac
| \r | U+000D Carriage Return |
| \t | U+0009 Horizontal Tab |
| \v | U+000b Vertical Tab |
| \\" | U+0022 Double Quote (in strings only) |
| \\" | U+0022 Double Quote |
| \\' | U+0027 Single Quote (in strings only) |
| \\\` | U+0060 Grave Accent (in identifiers only) |
| \\\\ | U+005c Backslash |
Expand Down
1 change: 1 addition & 0 deletions docs/_posts/2006-01-02-flag.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ A flag is a representation of a [command option]({{ '/reference/command.html#opt
| @@REPOSITORY | string | Directory path where files are located |
| @@TIMEZONE | string | Default TimeZone |
| @@DATETIME_FORMAT | string | Datetime Format to parse strings |
| @@ANSI_QUOTES | boolean | Use double quotation mark as identifier enclosure |
| @@WAIT_TIMEOUT | float | Limit of the waiting time in seconds to wait for locked files to be released |
| @@IMPORT_FORMAT | string | Default format to load files |
| @@DELIMITER | string | Field delimiter for CSV |
Expand Down
25 changes: 17 additions & 8 deletions docs/_posts/2006-01-02-statement.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,18 +88,18 @@ csvq > EXIT;
You can use following types in statements.

Identifier
: A identifier is a word starting with any unicode letter or a Low Line(U+005F `_`) and followed by a character string that contains any unicode letters, any digits or Low Lines(U+005F `_`).
You cannot use [reserved words](#reserved_words) as a identifier.
: An identifier is a word starting with any unicode letter or a Low Line(U+005F `_`) and followed by a character string that contains any unicode letters, any digits or Low Lines(U+005F `_`).
You cannot use [reserved words](#reserved_words) as an identifier.

Notwithstanding above naming restriction, you can use most character strings as a identifier by enclosing in Grave Accents(U+0060 ` ).
Back quotes are escaped by back slashes.
Notwithstanding above naming restriction, you can use most character strings as an identifier by enclosing in Grave Accents(U+0060 \` ) or Quotation Marks(U+0022 `"`) if [--ansi-quotes]({{ '/reference/command.html#options' | relative_url }}) is specified.
Enclosure characters are escaped by back slashes or double enclosures.

Identifiers represent tables, columns, functions or cursors.
Character case is insensitive except file paths, and whether file paths are case insensitive or not depends on your file system.

String
: A string is a character string enclosed in Apostrophes(U+0027 `'`) or Quotation Marks(U+0022 `"`).
In a string, single quotes or double quotes are escaped by back slashes.
: A string is a character string enclosed in Apostrophes(U+0027 `'`) or Quotation Marks(U+0022 `"`) if [--ansi-quotes]({{ '/reference/command.html#options' | relative_url }}) is not specified.
In a string, enclosure characters are escaped by back slashes or double enclosures.

Integer
: An integer is a word that contains only \[0-9\].
Expand Down Expand Up @@ -159,9 +159,10 @@ Runtime Information
```sql
abcde -- identifier
識別子 -- identifier
`ab+c\`de` -- identifier
`abc\`de` -- identifier
`abc``de` -- identifier
'abcd\'e' -- string
"abcd\"e" -- string
'abcd''e' -- string
123 -- integer
123.456 -- float
true -- ternary
Expand All @@ -172,6 +173,14 @@ null -- null
@%ENV_VAR -- environment variable
@%`ENV_VAR` -- environment variable
@#INFO -- runtime information

/* if --ansi-quotes is specified */
"abcd\"e" -- identifier
"abcd""e" -- identifier
/* if --ansi-quotes is not specified */
"abcd\"e" -- string
"abcd""e" -- string
```

## Comments
Expand Down
6 changes: 3 additions & 3 deletions docs/sitemap.xml
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@
</url>
<url>
<loc>https://mithrandie.github.io/csvq/reference/command.html</loc>
<lastmod>2019-05-04T12:09:52+00:00</lastmod>
<lastmod>2019-05-19T09:32:34+00:00</lastmod>
</url>
<url>
<loc>https://mithrandie.github.io/csvq/reference/statement.html</loc>
<lastmod>2018-11-24T06:47:39+00:00</lastmod>
<lastmod>2019-05-19T09:32:34+00:00</lastmod>
</url>
<url>
<loc>https://mithrandie.github.io/csvq/reference/value.html</loc>
Expand Down Expand Up @@ -102,7 +102,7 @@
</url>
<url>
<loc>https://mithrandie.github.io/csvq/reference/flag.html</loc>
<lastmod>2018-11-17T22:33:20+00:00</lastmod>
<lastmod>2019-05-19T09:32:34+00:00</lastmod>
</url>
<url>
<loc>https://mithrandie.github.io/csvq/reference/environment-variable.html</loc>
Expand Down
2 changes: 1 addition & 1 deletion lib/action/calc.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ func Calc(ctx context.Context, proc *query.Processor, expr string) error {
_ = proc.Tx.SetFlag(cmd.NoHeaderFlag, true)
q := "SELECT " + expr + " FROM STDIN"

program, _, err := parser.Parse(q, "", proc.Tx.Flags.DatetimeFormat, false)
program, _, err := parser.Parse(q, "", proc.Tx.Flags.DatetimeFormat, false, proc.Tx.Flags.AnsiQuotes)
if err != nil {
e := err.(*parser.SyntaxError)
e.SourceFile = ""
Expand Down
4 changes: 2 additions & 2 deletions lib/action/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ func Run(ctx context.Context, proc *query.Processor, input string, sourceFile st
showStats(ctx, proc, start)
}()

statements, _, err := parser.Parse(input, sourceFile, proc.Tx.Flags.DatetimeFormat, false)
statements, _, err := parser.Parse(input, sourceFile, proc.Tx.Flags.DatetimeFormat, false, proc.Tx.Flags.AnsiQuotes)
if err != nil {
return query.NewSyntaxError(err.(*parser.SyntaxError))
}
Expand Down Expand Up @@ -134,7 +134,7 @@ func LaunchInteractiveShell(ctx context.Context, proc *query.Processor) error {
proc.LogError(e.Error())
}

statements, _, e := parser.Parse(strings.Join(lines, "\n"), "", proc.Tx.Flags.DatetimeFormat, false)
statements, _, e := parser.Parse(strings.Join(lines, "\n"), "", proc.Tx.Flags.DatetimeFormat, false, proc.Tx.Flags.AnsiQuotes)
if e != nil {
if e = query.NewSyntaxError(e.(*parser.SyntaxError)); e != nil {
proc.LogError(e.Error())
Expand Down
12 changes: 8 additions & 4 deletions lib/cmd/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ const (
RepositoryFlag = "REPOSITORY"
TimezoneFlag = "TIMEZONE"
DatetimeFormatFlag = "DATETIME_FORMAT"
AnsiQuotesFlag = "ANSI_QUOTES"
WaitTimeoutFlag = "WAIT_TIMEOUT"
ImportFormatFlag = "IMPORT_FORMAT"
DelimiterFlag = "DELIMITER"
Expand Down Expand Up @@ -57,6 +58,7 @@ var FlagList = []string{
RepositoryFlag,
TimezoneFlag,
DatetimeFormatFlag,
AnsiQuotesFlag,
WaitTimeoutFlag,
ImportFormatFlag,
DelimiterFlag,
Expand Down Expand Up @@ -148,6 +150,7 @@ type Flags struct {
Repository string
Location string
DatetimeFormat []string
AnsiQuotes bool

// Must be updated from Transaction
WaitTimeout float64
Expand Down Expand Up @@ -210,6 +213,7 @@ func NewFlags(env *Environment) *Flags {
Repository: "",
Location: "Local",
DatetimeFormat: datetimeFormat,
AnsiQuotes: false,
WaitTimeout: 10,
Color: false,
ImportFormat: CSV,
Expand Down Expand Up @@ -295,6 +299,10 @@ func (f *Flags) SetDatetimeFormat(s string) {
}
}

func (f *Flags) SetAnsiQuotes(b bool) {
f.AnsiQuotes = b
}

func (f *Flags) SetWaitTimeout(t float64) {
if t < 0 {
t = 0
Expand Down Expand Up @@ -337,8 +345,6 @@ func (f *Flags) SetDelimiterPositions(s string) error {
if len(s) < 1 {
return nil
}
s = UnescapeString(s)

delimiterPositions, singleLine, err := ParseDelimiterPositions(s)
if err != nil {
return err
Expand Down Expand Up @@ -441,8 +447,6 @@ func (f *Flags) SetWriteDelimiterPositions(s string) error {
if len(s) < 1 {
return nil
}
s = UnescapeString(s)

delimiterPositions, singleLine, err := ParseDelimiterPositions(s)
if err != nil {
return errors.New(fmt.Sprintf("write-delimiter-positions must be %q or a JSON array of integers", DelimitAutomatically))
Expand Down
11 changes: 10 additions & 1 deletion lib/cmd/flags_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,15 @@ func TestFlags_SetDatetimeFormat(t *testing.T) {
}
}

func TestFlags_SetAnsiQuotes(t *testing.T) {
flags := NewFlags(nil)

flags.SetAnsiQuotes(true)
if !flags.AnsiQuotes {
t.Errorf("ansi_quotes = %t, expect to set %t", flags.AnsiQuotes, true)
}
}

func TestFlags_SetWaitTimeout(t *testing.T) {
flags := NewFlags(nil)

Expand Down Expand Up @@ -574,7 +583,7 @@ func TestFlags_SetQuiet(t *testing.T) {

flags.SetQuiet(true)
if !flags.Quiet {
t.Errorf("silent = %t, expect to set %t", flags.Quiet, true)
t.Errorf("quiet = %t, expect to set %t", flags.Quiet, true)
}
}

Expand Down
52 changes: 37 additions & 15 deletions lib/cmd/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ func EscapeString(s string) string {
buf.WriteString("\\t")
case '\v':
buf.WriteString("\\v")
case '"':
buf.WriteString("\\\"")
case '\'':
buf.WriteString("\\'")
case '\\':
Expand All @@ -46,12 +44,22 @@ func EscapeString(s string) string {
return buf.String()
}

func UnescapeString(s string) string {
func UnescapeString(s string, quote rune) string {
runes := []rune(s)
var buf bytes.Buffer

escaped := false
quoteRune := rune(0)
for _, r := range runes {
if 0 < quoteRune {
buf.WriteRune(quoteRune)
if r == quoteRune {
quoteRune = 0
continue
}
quoteRune = 0
}

if escaped {
switch r {
case 'a':
Expand All @@ -78,12 +86,14 @@ func UnescapeString(s string) string {
continue
}

if r == '\\' {
switch r {
case '\\':
escaped = true
continue
case quote:
quoteRune = r
default:
buf.WriteRune(r)
}

buf.WriteRune(r)
}
if escaped {
buf.WriteRune('\\')
Expand Down Expand Up @@ -123,12 +133,22 @@ func EscapeIdentifier(s string) string {
return buf.String()
}

func UnescapeIdentifier(s string) string {
func UnescapeIdentifier(s string, quote rune) string {
runes := []rune(s)
var buf bytes.Buffer

escaped := false
quoteRune := rune(0)
for _, r := range runes {
if 0 < quoteRune {
buf.WriteRune(quoteRune)
if r == quoteRune {
quoteRune = 0
continue
}
quoteRune = 0
}

if escaped {
switch r {
case 'a':
Expand All @@ -145,7 +165,7 @@ func UnescapeIdentifier(s string) string {
buf.WriteRune('\t')
case 'v':
buf.WriteRune('\v')
case '`', '\\':
case '"', '`', '\\':
buf.WriteRune(r)
default:
buf.WriteRune('\\')
Expand All @@ -155,12 +175,14 @@ func UnescapeIdentifier(s string) string {
continue
}

if r == '\\' {
switch r {
case '\\':
escaped = true
continue
case quote:
quoteRune = r
default:
buf.WriteRune(r)
}

buf.WriteRune(r)
}
if escaped {
buf.WriteRune('\\')
Expand Down Expand Up @@ -281,15 +303,15 @@ func ParseLineBreak(s string) (text.LineBreak, error) {
return lb, err
}
func ParseDelimiter(s string) (rune, error) {
r := []rune(UnescapeString(s))
r := []rune(UnescapeString(s, '\''))
if len(r) != 1 {
return 0, errors.New("delimiter must be one character")
}
return r[0], nil
}

func ParseDelimiterPositions(s string) ([]int, bool, error) {
s = UnescapeString(s)
s = UnescapeString(s, '\'')
var delimiterPositions []int = nil
singleLine := false

Expand Down
18 changes: 9 additions & 9 deletions lib/cmd/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,17 @@ import (

func TestEscapeString(t *testing.T) {
str := "fo\\o\a\b\f\n\r\t\v\\\\'\"bar\\"
expect := "fo\\\\o\\a\\b\\f\\n\\r\\t\\v\\\\\\\\\\'\\\"bar\\\\"
expect := "fo\\\\o\\a\\b\\f\\n\\r\\t\\v\\\\\\\\\\'\"bar\\\\"
unescaped := EscapeString(str)
if unescaped != expect {
t.Errorf("escaped string = %q, want %q", unescaped, expect)
}
}

func TestUnescapeString(t *testing.T) {
str := "fo\\o\\a\\b\\f\\n\\r\\t\\v\\\\\\\\'\\\"bar\\"
expect := "fo\\o\a\b\f\n\r\t\v\\\\'\"bar\\"
unescaped := UnescapeString(str)
str := "fo\\o\\a\\b\\f\\n\\r\\t\\v\\\\\\\\'\\\"bar''\"\"\\"
expect := "fo\\o\a\b\f\n\r\t\v\\\\'\"bar'\"\"\\"
unescaped := UnescapeString(str, '\'')
if unescaped != expect {
t.Errorf("unescaped string = %q, want %q", unescaped, expect)
}
Expand All @@ -35,9 +35,9 @@ func TestEscapeIdentifier(t *testing.T) {
}

func TestUnescapeIdentifier(t *testing.T) {
str := "fo\\o\\a\\b\\f\\n\\r\\t\\v\\\\\\\\`bar\\"
expect := "fo\\o\a\b\f\n\r\t\v\\\\`bar\\"
unescaped := UnescapeIdentifier(str)
str := "fo\\o\\a\\b\\f\\n\\r\\t\\v\\\\\\\\`bar``\\"
expect := "fo\\o\a\b\f\n\r\t\v\\\\`bar`\\"
unescaped := UnescapeIdentifier(str, '`')
if unescaped != expect {
t.Errorf("unescaped identifier = %q, want %q", unescaped, expect)
}
Expand Down Expand Up @@ -317,12 +317,12 @@ var unescapeStringBenchString2 = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrst

func BenchmarkUnescapeString(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = UnescapeString(unescapeStringBenchString)
_ = UnescapeString(unescapeStringBenchString, '\'')
}
}

func BenchmarkUnescapeString2(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = UnescapeString(unescapeStringBenchString2)
_ = UnescapeString(unescapeStringBenchString2, '\'')
}
}
Loading

0 comments on commit 0d36f81

Please sign in to comment.