From 77f2b2cd036d4c8572f4042ec53dfbc66a161d64 Mon Sep 17 00:00:00 2001
From: "aleksei.p" <aleksei@validio.io>
Date: Thu, 5 Dec 2024 01:42:16 +0100
Subject: [PATCH] update

---
 src/dialect/mod.rs          |  9 ++++++
 src/dialect/redshift.rs     | 22 +++++++++++++-
 src/tokenizer.rs            | 58 ++++++++++++++++++++++++++++++-------
 tests/sqlparser_redshift.rs |  3 ++
 4 files changed, 80 insertions(+), 12 deletions(-)
diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs
index a8993e685..a43cbbc51 100644
--- a/src/dialect/mod.rs
+++ b/src/dialect/mod.rs
@@ -138,6 +138,15 @@ pub trait Dialect: Debug + Any {
         true
     }
 
+    /// Determine if nested quote start is presented and return it
+    fn nested_quote_start(
+        &self,
+        _quote_start: char,
+        mut _chars: Peekable<Chars<'_>>,
+    ) -> Option<char> {
+        None
+    }
+
     /// Determine if a character is a valid start character for an unquoted identifier
     fn is_identifier_start(&self, ch: char) -> bool;
 
diff --git a/src/dialect/redshift.rs b/src/dialect/redshift.rs
index 764e043a6..8dc83bc69 100644
--- a/src/dialect/redshift.rs
+++ b/src/dialect/redshift.rs
@@ -45,7 +45,7 @@ impl Dialect for RedshiftSqlDialect {
         // a start character and a quote is a beginning of quoted identifier.
         // Skipping analyzing token such as `"a"` and analyze only token that
         // can be part of json path potentially.
-        // For ex., `[0]`, `['a']` (seems part of json path) or `["a"]` (normal quoted identifier)
+        // For ex., `[0]` (seems part of json path) or `["a"]` (normal quoted identifier)
         if let Some(quote_start) = chars.peek() {
             if *quote_start == '"' {
                 return true;
@@ -63,6 +63,26 @@ impl Dialect for RedshiftSqlDialect {
         false
     }
 
+    /// RedShift support nested quoted identifier like `["a"]`.
+    /// Determine if nested quote started and return it.  
+    fn nested_quote_start(
+        &self,
+        quote_start: char,
+        mut chars: Peekable<Chars<'_>>,
+    ) -> Option<char> {
+        if quote_start != '[' {
+            return None;
+        }
+
+        chars.next(); // skip opening quote start
+
+        if chars.skip_while(|ch| ch.is_whitespace()).peekable().peek() == Some(&'"') {
+            Some('"')
+        } else {
+            None
+        }
+    }
+
     fn is_identifier_start(&self, ch: char) -> bool {
         // Extends Postgres dialect with sharp
         PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#'
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index aacfc16fa..a00db81be 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -1081,19 +1081,34 @@ impl<'a> Tokenizer<'a> {
                             .dialect
                             .is_proper_identifier_inside_quotes(chars.peekable.clone()) =>
                 {
-                    let error_loc = chars.location();
-                    chars.next(); // consume the opening quote
-                    let quote_end = Word::matching_end_quote(quote_start);
-                    let (s, last_char) = self.parse_quoted_ident(chars, quote_end);
+                    let word = if let Some(nested_quote_start) = self
+                        .dialect
+                        .nested_quote_start(quote_start, chars.peekable.clone())
+                    {
+                        chars.next(); // consume the opening quote
+
+                        let quote_end = Word::matching_end_quote(quote_start);
+                        let error_loc = chars.location();
+
+                        peeking_take_while(chars, |ch| ch.is_whitespace());
+                        let nested_word =
+                            self.tokenize_quoted_identifier(nested_quote_start, chars)?;
+                        peeking_take_while(chars, |ch| ch.is_whitespace());
+
+                        if chars.peek() != Some(&quote_end) {
+                            return self.tokenizer_error(
+                                error_loc,
+                                format!("Expected close delimiter '{quote_end}' before EOF."),
+                            );
+                        }
+
+                        chars.next(); // consume the closing nested quote
 
-                    if last_char == Some(quote_end) {
-                        Ok(Some(Token::make_word(&s, Some(quote_start))))
+                        format!("{nested_quote_start}{nested_word}{nested_quote_start}")
                     } else {
-                        self.tokenizer_error(
-                            error_loc,
-                            format!("Expected close delimiter '{quote_end}' before EOF."),
-                        )
-                    }
+                        self.tokenize_quoted_identifier(quote_start, chars)?
+                    };
+                    Ok(Some(Token::make_word(&word, Some(quote_start))))
                 }
                 // numbers and period
                 '0'..='9' | '.' => {
@@ -1597,6 +1612,27 @@ impl<'a> Tokenizer<'a> {
         s
     }
 
+    /// Tokenize an identifier or keyword, after the first char is already consumed.
+    fn tokenize_quoted_identifier(
+        &self,
+        quote_start: char,
+        chars: &mut State,
+    ) -> Result<String, TokenizerError> {
+        let error_loc = chars.location();
+        chars.next(); // consume the opening quote
+        let quote_end = Word::matching_end_quote(quote_start);
+        let (s, last_char) = self.parse_quoted_ident(chars, quote_end);
+
+        if last_char == Some(quote_end) {
+            Ok(s)
+        } else {
+            self.tokenizer_error(
+                error_loc,
+                format!("Expected close delimiter '{quote_end}' before EOF."),
+            )
+        }
+    }
+
     /// Read a single quoted string, starting with the opening quote.
     fn tokenize_escaped_single_quoted_string(
         &self,
diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs
index 63cf0b13b..05090ac58 100644
--- a/tests/sqlparser_redshift.rs
+++ b/tests/sqlparser_redshift.rs
@@ -384,6 +384,9 @@ fn test_parse_select_numbered_columns() {
     redshift_and_generic().verified_stmt(r#"SELECT 1 AS "1" FROM a"#);
     // RedShift specific case - quoted identifier inside square bracket
     redshift().verified_stmt(r#"SELECT 1 AS ["1"] FROM a"#);
+    redshift().verified_stmt(r#"SELECT 1 AS ["[="] FROM a"#);
+    redshift().verified_stmt(r#"SELECT 1 AS ["=]"] FROM a"#);
+    redshift().verified_stmt(r#"SELECT 1 AS ["a[b]"] FROM a"#);
 }
 
 #[test]