diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index a43cbbc51..437958885 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -133,17 +133,11 @@ pub trait Dialect: Debug + Any { None } - /// Determine if quoted characters are proper for identifier - fn is_proper_identifier_inside_quotes(&self, mut _chars: Peekable>) -> bool { - true - } - - /// Determine if nested quote start is presented and return it - fn nested_quote_start( + /// Determine if special way quoted characters are presented + fn special_delimited_identifier_start( &self, - _quote_start: char, mut _chars: Peekable>, - ) -> Option { + ) -> Option<(char, Option)> { None } @@ -865,11 +859,11 @@ mod tests { self.0.supports_string_literal_backslash_escape() } - fn is_proper_identifier_inside_quotes( + fn special_delimited_identifier_start( &self, chars: std::iter::Peekable>, - ) -> bool { - self.0.is_proper_identifier_inside_quotes(chars) + ) -> Option<(char, Option)> { + self.0.special_delimited_identifier_start(chars) } fn supports_filter_during_aggregation(&self) -> bool { diff --git a/src/dialect/redshift.rs b/src/dialect/redshift.rs index 8dc83bc69..27c1b0e1d 100644 --- a/src/dialect/redshift.rs +++ b/src/dialect/redshift.rs @@ -32,55 +32,32 @@ pub struct RedshiftSqlDialect {} // in the Postgres dialect, the query will be parsed as an array, while in the Redshift dialect it will // be a json path impl Dialect for RedshiftSqlDialect { - fn is_delimited_identifier_start(&self, ch: char) -> bool { - ch == '"' || ch == '[' - } - - /// Determine if quoted characters are proper for identifier + /// Determine if quoted characters are looks like special case of quotation begining with `[`. /// It's needed to distinguish treating square brackets as quotes from /// treating them as json path. If there is identifier then we assume /// there is no json path. - fn is_proper_identifier_inside_quotes(&self, mut chars: Peekable>) -> bool { - // PartiQL (used as json path query language in Redshift) uses square bracket as - // a start character and a quote is a beginning of quoted identifier. - // Skipping analyzing token such as `"a"` and analyze only token that - // can be part of json path potentially. - // For ex., `[0]` (seems part of json path) or `["a"]` (normal quoted identifier) - if let Some(quote_start) = chars.peek() { - if *quote_start == '"' { - return true; - } - }; - chars.next(); - let mut not_white_chars = chars.skip_while(|ch| ch.is_whitespace()).peekable(); - if let Some(&ch) = not_white_chars.peek() { - // PartiQL uses single quote as starting identification inside a quote - // It is a normal identifier if it has no single quote at the beginning. - // Square bracket can contain quoted identifier. - // For ex., `["a"]`, but this is not a part of json path, and it is a normal quoted identifier. - return ch == '"' || self.is_identifier_start(ch); - } - false - } - - /// RedShift support nested quoted identifier like `["a"]`. - /// Determine if nested quote started and return it. - fn nested_quote_start( + fn special_delimited_identifier_start( &self, - quote_start: char, mut chars: Peekable>, - ) -> Option { - if quote_start != '[' { + ) -> Option<(char, Option)> { + if chars.peek() != Some(&'[') { return None; } - chars.next(); // skip opening quote start + chars.next(); + + let mut not_white_chars = chars.skip_while(|ch| ch.is_whitespace()).peekable(); - if chars.skip_while(|ch| ch.is_whitespace()).peekable().peek() == Some(&'"') { - Some('"') - } else { - None + if let Some(&ch) = not_white_chars.peek() { + if ch == '"' { + return Some(('[', Some('"'))); + } + if self.is_identifier_start(ch) { + return Some(('[', None)); + } } + + None } fn is_identifier_start(&self, ch: char) -> bool { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 59fefe531..762d62214 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1075,40 +1075,41 @@ impl<'a> Tokenizer<'a> { Ok(Some(Token::DoubleQuotedString(s))) } // delimited (quoted) identifier - quote_start - if self.dialect.is_delimited_identifier_start(ch) - && self - .dialect - .is_proper_identifier_inside_quotes(chars.peekable.clone()) => + quote_start if self.dialect.is_delimited_identifier_start(ch) => { + let word = self.tokenize_quoted_identifier(quote_start, chars)?; + Ok(Some(Token::make_word(&word, Some(quote_start)))) + } + // special (quoted) identifier + _ if self + .dialect + .special_delimited_identifier_start(chars.peekable.clone()) + .is_some() => { - let word = if let Some(nested_quote_start) = self + let (quote_start, nested_delimiter) = self .dialect - .nested_quote_start(quote_start, chars.peekable.clone()) - { - chars.next(); // consume the opening quote + .special_delimited_identifier_start(chars.peekable.clone()) + .unwrap(); - let quote_end = Word::matching_end_quote(quote_start); - let error_loc = chars.location(); + let mut word = vec![]; - peeking_take_while(chars, |ch| ch.is_whitespace()); - let nested_word = - self.tokenize_quoted_identifier(nested_quote_start, chars)?; - peeking_take_while(chars, |ch| ch.is_whitespace()); + let identifier_quote_start = if let Some(nested_delimiter) = nested_delimiter { + chars.next(); // skip the first delimiter + word.push(peeking_take_while(chars, |ch| ch.is_whitespace())); + word.push(format!("{nested_delimiter}")); + nested_delimiter + } else { + quote_start + }; - if chars.peek() != Some("e_end) { - return self.tokenizer_error( - error_loc, - format!("Expected close delimiter '{quote_end}' before EOF."), - ); - } + word.push(self.tokenize_quoted_identifier(identifier_quote_start, chars)?); - chars.next(); // consume the closing nested quote + if let Some(nested_delimiter) = nested_delimiter { + word.push(format!("{}", Word::matching_end_quote(nested_delimiter))); + word.push(peeking_take_while(chars, |ch| ch.is_whitespace())); + chars.next(); // skip close of first delimiter + } - format!("{nested_quote_start}{nested_word}{nested_quote_start}") - } else { - self.tokenize_quoted_identifier(quote_start, chars)? - }; - Ok(Some(Token::make_word(&word, Some(quote_start)))) + Ok(Some(Token::make_word(&word.concat(), Some(quote_start)))) } // numbers and period '0'..='9' | '.' => {