From 77f2b2cd036d4c8572f4042ec53dfbc66a161d64 Mon Sep 17 00:00:00 2001 From: "aleksei.p" Date: Thu, 5 Dec 2024 01:42:16 +0100 Subject: [PATCH] update --- src/dialect/mod.rs | 9 ++++++ src/dialect/redshift.rs | 22 +++++++++++++- src/tokenizer.rs | 58 ++++++++++++++++++++++++++++++------- tests/sqlparser_redshift.rs | 3 ++ 4 files changed, 80 insertions(+), 12 deletions(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index a8993e685..a43cbbc51 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -138,6 +138,15 @@ pub trait Dialect: Debug + Any { true } + /// Determine if nested quote start is presented and return it + fn nested_quote_start( + &self, + _quote_start: char, + mut _chars: Peekable>, + ) -> Option { + None + } + /// Determine if a character is a valid start character for an unquoted identifier fn is_identifier_start(&self, ch: char) -> bool; diff --git a/src/dialect/redshift.rs b/src/dialect/redshift.rs index 764e043a6..8dc83bc69 100644 --- a/src/dialect/redshift.rs +++ b/src/dialect/redshift.rs @@ -45,7 +45,7 @@ impl Dialect for RedshiftSqlDialect { // a start character and a quote is a beginning of quoted identifier. // Skipping analyzing token such as `"a"` and analyze only token that // can be part of json path potentially. - // For ex., `[0]`, `['a']` (seems part of json path) or `["a"]` (normal quoted identifier) + // For ex., `[0]` (seems part of json path) or `["a"]` (normal quoted identifier) if let Some(quote_start) = chars.peek() { if *quote_start == '"' { return true; @@ -63,6 +63,26 @@ impl Dialect for RedshiftSqlDialect { false } + /// RedShift support nested quoted identifier like `["a"]`. + /// Determine if nested quote started and return it. + fn nested_quote_start( + &self, + quote_start: char, + mut chars: Peekable>, + ) -> Option { + if quote_start != '[' { + return None; + } + + chars.next(); // skip opening quote start + + if chars.skip_while(|ch| ch.is_whitespace()).peekable().peek() == Some(&'"') { + Some('"') + } else { + None + } + } + fn is_identifier_start(&self, ch: char) -> bool { // Extends Postgres dialect with sharp PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#' diff --git a/src/tokenizer.rs b/src/tokenizer.rs index aacfc16fa..a00db81be 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1081,19 +1081,34 @@ impl<'a> Tokenizer<'a> { .dialect .is_proper_identifier_inside_quotes(chars.peekable.clone()) => { - let error_loc = chars.location(); - chars.next(); // consume the opening quote - let quote_end = Word::matching_end_quote(quote_start); - let (s, last_char) = self.parse_quoted_ident(chars, quote_end); + let word = if let Some(nested_quote_start) = self + .dialect + .nested_quote_start(quote_start, chars.peekable.clone()) + { + chars.next(); // consume the opening quote + + let quote_end = Word::matching_end_quote(quote_start); + let error_loc = chars.location(); + + peeking_take_while(chars, |ch| ch.is_whitespace()); + let nested_word = + self.tokenize_quoted_identifier(nested_quote_start, chars)?; + peeking_take_while(chars, |ch| ch.is_whitespace()); + + if chars.peek() != Some("e_end) { + return self.tokenizer_error( + error_loc, + format!("Expected close delimiter '{quote_end}' before EOF."), + ); + } + + chars.next(); // consume the closing nested quote - if last_char == Some(quote_end) { - Ok(Some(Token::make_word(&s, Some(quote_start)))) + format!("{nested_quote_start}{nested_word}{nested_quote_start}") } else { - self.tokenizer_error( - error_loc, - format!("Expected close delimiter '{quote_end}' before EOF."), - ) - } + self.tokenize_quoted_identifier(quote_start, chars)? + }; + Ok(Some(Token::make_word(&word, Some(quote_start)))) } // numbers and period '0'..='9' | '.' => { @@ -1597,6 +1612,27 @@ impl<'a> Tokenizer<'a> { s } + /// Tokenize an identifier or keyword, after the first char is already consumed. + fn tokenize_quoted_identifier( + &self, + quote_start: char, + chars: &mut State, + ) -> Result { + let error_loc = chars.location(); + chars.next(); // consume the opening quote + let quote_end = Word::matching_end_quote(quote_start); + let (s, last_char) = self.parse_quoted_ident(chars, quote_end); + + if last_char == Some(quote_end) { + Ok(s) + } else { + self.tokenizer_error( + error_loc, + format!("Expected close delimiter '{quote_end}' before EOF."), + ) + } + } + /// Read a single quoted string, starting with the opening quote. fn tokenize_escaped_single_quoted_string( &self, diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 63cf0b13b..05090ac58 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -384,6 +384,9 @@ fn test_parse_select_numbered_columns() { redshift_and_generic().verified_stmt(r#"SELECT 1 AS "1" FROM a"#); // RedShift specific case - quoted identifier inside square bracket redshift().verified_stmt(r#"SELECT 1 AS ["1"] FROM a"#); + redshift().verified_stmt(r#"SELECT 1 AS ["[="] FROM a"#); + redshift().verified_stmt(r#"SELECT 1 AS ["=]"] FROM a"#); + redshift().verified_stmt(r#"SELECT 1 AS ["a[b]"] FROM a"#); } #[test]