Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
7phs committed Dec 5, 2024
1 parent 3868d24 commit 799eded
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 78 deletions.
18 changes: 6 additions & 12 deletions src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,17 +133,11 @@ pub trait Dialect: Debug + Any {
None
}

/// Determine if quoted characters are proper for identifier
fn is_proper_identifier_inside_quotes(&self, mut _chars: Peekable<Chars<'_>>) -> bool {
true
}

/// Determine if nested quote start is presented and return it
fn nested_quote_start(
/// Determine if special way quoted characters are presented
fn special_delimited_identifier_start(
&self,
_quote_start: char,
mut _chars: Peekable<Chars<'_>>,
) -> Option<char> {
) -> Option<(char, Option<char>)> {
None
}

Expand Down Expand Up @@ -865,11 +859,11 @@ mod tests {
self.0.supports_string_literal_backslash_escape()
}

fn is_proper_identifier_inside_quotes(
fn special_delimited_identifier_start(
&self,
chars: std::iter::Peekable<std::str::Chars<'_>>,
) -> bool {
self.0.is_proper_identifier_inside_quotes(chars)
) -> Option<(char, Option<char>)> {
self.0.special_delimited_identifier_start(chars)
}

fn supports_filter_during_aggregation(&self) -> bool {
Expand Down
55 changes: 16 additions & 39 deletions src/dialect/redshift.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,55 +32,32 @@ pub struct RedshiftSqlDialect {}
// in the Postgres dialect, the query will be parsed as an array, while in the Redshift dialect it will
// be a json path
impl Dialect for RedshiftSqlDialect {
fn is_delimited_identifier_start(&self, ch: char) -> bool {
ch == '"' || ch == '['
}

/// Determine if quoted characters are proper for identifier
/// Determine if quoted characters are looks like special case of quotation begining with `[`.
/// It's needed to distinguish treating square brackets as quotes from
/// treating them as json path. If there is identifier then we assume
/// there is no json path.
fn is_proper_identifier_inside_quotes(&self, mut chars: Peekable<Chars<'_>>) -> bool {
// PartiQL (used as json path query language in Redshift) uses square bracket as
// a start character and a quote is a beginning of quoted identifier.
// Skipping analyzing token such as `"a"` and analyze only token that
// can be part of json path potentially.
// For ex., `[0]` (seems part of json path) or `["a"]` (normal quoted identifier)
if let Some(quote_start) = chars.peek() {
if *quote_start == '"' {
return true;
}
};
chars.next();
let mut not_white_chars = chars.skip_while(|ch| ch.is_whitespace()).peekable();
if let Some(&ch) = not_white_chars.peek() {
// PartiQL uses single quote as starting identification inside a quote
// It is a normal identifier if it has no single quote at the beginning.
// Square bracket can contain quoted identifier.
// For ex., `["a"]`, but this is not a part of json path, and it is a normal quoted identifier.
return ch == '"' || self.is_identifier_start(ch);
}
false
}

/// RedShift support nested quoted identifier like `["a"]`.
/// Determine if nested quote started and return it.
fn nested_quote_start(
fn special_delimited_identifier_start(
&self,
quote_start: char,
mut chars: Peekable<Chars<'_>>,
) -> Option<char> {
if quote_start != '[' {
) -> Option<(char, Option<char>)> {
if chars.peek() != Some(&'[') {
return None;
}

chars.next(); // skip opening quote start
chars.next();

let mut not_white_chars = chars.skip_while(|ch| ch.is_whitespace()).peekable();

if chars.skip_while(|ch| ch.is_whitespace()).peekable().peek() == Some(&'"') {
Some('"')
} else {
None
if let Some(&ch) = not_white_chars.peek() {
if ch == '"' {
return Some(('[', Some('"')));
}
if self.is_identifier_start(ch) {
return Some(('[', None));
}
}

None
}

fn is_identifier_start(&self, ch: char) -> bool {
Expand Down
55 changes: 28 additions & 27 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1075,40 +1075,41 @@ impl<'a> Tokenizer<'a> {
Ok(Some(Token::DoubleQuotedString(s)))
}
// delimited (quoted) identifier
quote_start
if self.dialect.is_delimited_identifier_start(ch)
&& self
.dialect
.is_proper_identifier_inside_quotes(chars.peekable.clone()) =>
quote_start if self.dialect.is_delimited_identifier_start(ch) => {
let word = self.tokenize_quoted_identifier(quote_start, chars)?;
Ok(Some(Token::make_word(&word, Some(quote_start))))
}
// special (quoted) identifier
_ if self
.dialect
.special_delimited_identifier_start(chars.peekable.clone())
.is_some() =>
{
let word = if let Some(nested_quote_start) = self
let (quote_start, nested_delimiter) = self
.dialect
.nested_quote_start(quote_start, chars.peekable.clone())
{
chars.next(); // consume the opening quote
.special_delimited_identifier_start(chars.peekable.clone())
.unwrap();

let quote_end = Word::matching_end_quote(quote_start);
let error_loc = chars.location();
let mut word = vec![];

peeking_take_while(chars, |ch| ch.is_whitespace());
let nested_word =
self.tokenize_quoted_identifier(nested_quote_start, chars)?;
peeking_take_while(chars, |ch| ch.is_whitespace());
let identifier_quote_start = if let Some(nested_delimiter) = nested_delimiter {
chars.next(); // skip the first delimiter
word.push(peeking_take_while(chars, |ch| ch.is_whitespace()));
word.push(format!("{nested_delimiter}"));
nested_delimiter
} else {
quote_start
};

if chars.peek() != Some(&quote_end) {
return self.tokenizer_error(
error_loc,
format!("Expected close delimiter '{quote_end}' before EOF."),
);
}
word.push(self.tokenize_quoted_identifier(identifier_quote_start, chars)?);

chars.next(); // consume the closing nested quote
if let Some(nested_delimiter) = nested_delimiter {
word.push(format!("{}", Word::matching_end_quote(nested_delimiter)));
word.push(peeking_take_while(chars, |ch| ch.is_whitespace()));
chars.next(); // skip close of first delimiter
}

format!("{nested_quote_start}{nested_word}{nested_quote_start}")
} else {
self.tokenize_quoted_identifier(quote_start, chars)?
};
Ok(Some(Token::make_word(&word, Some(quote_start))))
Ok(Some(Token::make_word(&word.concat(), Some(quote_start))))
}
// numbers and period
'0'..='9' | '.' => {
Expand Down

0 comments on commit 799eded

Please sign in to comment.