Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
7phs committed Dec 5, 2024
1 parent 9d5d71b commit 77f2b2c
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 12 deletions.
9 changes: 9 additions & 0 deletions src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,15 @@ pub trait Dialect: Debug + Any {
true
}

/// Determine if nested quote start is presented and return it
fn nested_quote_start(
&self,
_quote_start: char,
mut _chars: Peekable<Chars<'_>>,
) -> Option<char> {
None
}

/// Determine if a character is a valid start character for an unquoted identifier
fn is_identifier_start(&self, ch: char) -> bool;

Expand Down
22 changes: 21 additions & 1 deletion src/dialect/redshift.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ impl Dialect for RedshiftSqlDialect {
// a start character and a quote is a beginning of quoted identifier.
// Skipping analyzing token such as `"a"` and analyze only token that
// can be part of json path potentially.
// For ex., `[0]`, `['a']` (seems part of json path) or `["a"]` (normal quoted identifier)
// For ex., `[0]` (seems part of json path) or `["a"]` (normal quoted identifier)
if let Some(quote_start) = chars.peek() {
if *quote_start == '"' {
return true;
Expand All @@ -63,6 +63,26 @@ impl Dialect for RedshiftSqlDialect {
false
}

/// RedShift support nested quoted identifier like `["a"]`.
/// Determine if nested quote started and return it.
fn nested_quote_start(
&self,
quote_start: char,
mut chars: Peekable<Chars<'_>>,
) -> Option<char> {
if quote_start != '[' {
return None;
}

chars.next(); // skip opening quote start

if chars.skip_while(|ch| ch.is_whitespace()).peekable().peek() == Some(&'"') {
Some('"')
} else {
None
}
}

fn is_identifier_start(&self, ch: char) -> bool {
// Extends Postgres dialect with sharp
PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#'
Expand Down
58 changes: 47 additions & 11 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1081,19 +1081,34 @@ impl<'a> Tokenizer<'a> {
.dialect
.is_proper_identifier_inside_quotes(chars.peekable.clone()) =>
{
let error_loc = chars.location();
chars.next(); // consume the opening quote
let quote_end = Word::matching_end_quote(quote_start);
let (s, last_char) = self.parse_quoted_ident(chars, quote_end);
let word = if let Some(nested_quote_start) = self
.dialect
.nested_quote_start(quote_start, chars.peekable.clone())
{
chars.next(); // consume the opening quote

let quote_end = Word::matching_end_quote(quote_start);
let error_loc = chars.location();

peeking_take_while(chars, |ch| ch.is_whitespace());
let nested_word =
self.tokenize_quoted_identifier(nested_quote_start, chars)?;
peeking_take_while(chars, |ch| ch.is_whitespace());

if chars.peek() != Some(&quote_end) {
return self.tokenizer_error(
error_loc,
format!("Expected close delimiter '{quote_end}' before EOF."),
);
}

chars.next(); // consume the closing nested quote

if last_char == Some(quote_end) {
Ok(Some(Token::make_word(&s, Some(quote_start))))
format!("{nested_quote_start}{nested_word}{nested_quote_start}")
} else {
self.tokenizer_error(
error_loc,
format!("Expected close delimiter '{quote_end}' before EOF."),
)
}
self.tokenize_quoted_identifier(quote_start, chars)?
};
Ok(Some(Token::make_word(&word, Some(quote_start))))
}
// numbers and period
'0'..='9' | '.' => {
Expand Down Expand Up @@ -1597,6 +1612,27 @@ impl<'a> Tokenizer<'a> {
s
}

/// Tokenize an identifier or keyword, after the first char is already consumed.
fn tokenize_quoted_identifier(
&self,
quote_start: char,
chars: &mut State,
) -> Result<String, TokenizerError> {
let error_loc = chars.location();
chars.next(); // consume the opening quote
let quote_end = Word::matching_end_quote(quote_start);
let (s, last_char) = self.parse_quoted_ident(chars, quote_end);

if last_char == Some(quote_end) {
Ok(s)
} else {
self.tokenizer_error(
error_loc,
format!("Expected close delimiter '{quote_end}' before EOF."),
)
}
}

/// Read a single quoted string, starting with the opening quote.
fn tokenize_escaped_single_quoted_string(
&self,
Expand Down
3 changes: 3 additions & 0 deletions tests/sqlparser_redshift.rs
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,9 @@ fn test_parse_select_numbered_columns() {
redshift_and_generic().verified_stmt(r#"SELECT 1 AS "1" FROM a"#);
// RedShift specific case - quoted identifier inside square bracket
redshift().verified_stmt(r#"SELECT 1 AS ["1"] FROM a"#);
redshift().verified_stmt(r#"SELECT 1 AS ["[="] FROM a"#);
redshift().verified_stmt(r#"SELECT 1 AS ["=]"] FROM a"#);
redshift().verified_stmt(r#"SELECT 1 AS ["a[b]"] FROM a"#);
}

#[test]
Expand Down

0 comments on commit 77f2b2c

Please sign in to comment.