Skip to content

Commit

Permalink
Add support for ClickHouse FORMAT on INSERT
Browse files Browse the repository at this point in the history
Adds supports for the `SETTINGS` and `FORMAT` keywords used for
ClickHouse when inserting data with other syntax than SQL. This can
happen e.g. when using the ClickHouse CLI tool to pipe input from files
or similar.
  • Loading branch information
bombsimon committed Dec 30, 2024
1 parent fe36020 commit 08fadd1
Show file tree
Hide file tree
Showing 8 changed files with 157 additions and 35 deletions.
30 changes: 26 additions & 4 deletions src/ast/dml.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ pub use super::ddl::{ColumnDef, TableConstraint};

use super::{
display_comma_separated, display_separated, ClusteredBy, CommentDef, Expr, FileFormat,
FromTable, HiveDistributionStyle, HiveFormat, HiveIOFormat, HiveRowFormat, Ident,
FormatClause, FromTable, HiveDistributionStyle, HiveFormat, HiveIOFormat, HiveRowFormat, Ident,
InsertAliases, MysqlInsertPriority, ObjectName, OnCommit, OnInsert, OneOrManyWithParens,
OrderByExpr, Query, RowAccessPolicy, SelectItem, SqlOption, SqliteOnConflict, TableEngine,
TableWithJoins, Tag, WrappedCollection,
OrderByExpr, Query, RowAccessPolicy, SelectItem, Setting, SqlOption, SqliteOnConflict,
TableEngine, TableWithJoins, Tag, WrappedCollection,
};

/// CREATE INDEX statement.
Expand Down Expand Up @@ -495,6 +495,20 @@ pub struct Insert {
pub priority: Option<MysqlInsertPriority>,
/// Only for mysql
pub insert_alias: Option<InsertAliases>,
/// Settings used in together with a specified `FORMAT`.
///
/// ClickHouse syntax: `INSERT INTO tbl SETTINGS format_template_resultset = '/some/path/resultset.format'`
///
/// [ClickHouse `INSERT INTO`](https://clickhouse.com/docs/en/sql-reference/statements/insert-into)
/// [ClickHouse Formats](https://clickhouse.com/docs/en/interfaces/formats)
pub settings: Option<Vec<Setting>>,
/// Format for `INSERT` statement when not using standard SQL format. Can be e.g. `CSV`,
/// `JSON`, `JSONAsString`, `LineAsString` and more.
///
/// ClickHouse syntax: `INSERT INTO tbl FORMAT JSONEachRow {"foo": 1, "bar": 2}, {"foo": 3}`
///
/// [ClickHouse formats JSON insert](https://clickhouse.com/docs/en/interfaces/formats#json-inserting-data)
pub format_clause: Option<FormatClause>,
}

impl Display for Insert {
Expand Down Expand Up @@ -547,7 +561,15 @@ impl Display for Insert {
write!(f, "{source}")?;
}

if self.source.is_none() && self.columns.is_empty() {
if let Some(settings) = &self.settings {
write!(f, "SETTINGS {} ", display_comma_separated(settings))?;
}

if let Some(format_clause) = &self.format_clause {
write!(f, "{format_clause}")?;
}

if self.source.is_none() && self.columns.is_empty() && self.format_clause.is_none() {
write!(f, "DEFAULT VALUES")?;
}

Expand Down
15 changes: 13 additions & 2 deletions src/ast/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2465,14 +2465,25 @@ impl fmt::Display for GroupByExpr {
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum FormatClause {
Identifier(Ident),
Identifier {
ident: Ident,
expr: Option<Vec<Expr>>,
},
Null,
}

impl fmt::Display for FormatClause {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
FormatClause::Identifier(ident) => write!(f, "FORMAT {}", ident),
FormatClause::Identifier { ident, expr } => {
write!(f, "FORMAT {}", ident)?;

if let Some(exprs) = expr {
write!(f, " {}", display_comma_separated(exprs))?;
}

Ok(())
}
FormatClause::Null => write!(f, "FORMAT NULL"),
}
}
Expand Down
8 changes: 5 additions & 3 deletions src/ast/spans.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1148,9 +1148,11 @@ impl Spanned for Insert {
table: _, // bool
on,
returning,
replace_into: _, // bool
priority: _, // todo, mysql specific
insert_alias: _, // todo, mysql specific
replace_into: _, // bool
priority: _, // todo, mysql specific
insert_alias: _, // todo, mysql specific
settings: _, // todo, clickhouse specific
format_clause: _, // todo, clickhouse specific
} = self;

union_spans(
Expand Down
8 changes: 8 additions & 0 deletions src/dialect/clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,12 @@ impl Dialect for ClickHouseDialect {
fn supports_limit_comma(&self) -> bool {
true
}

// ClickHouse uses this for some FORMAT expressions in `INSERT` context, e.g. when inserting
// with FORMAT JSONEachRow a raw JSON key-value expression is valid and expected.
//
// [ClickHouse formats](https://clickhouse.com/docs/en/interfaces/formats)
fn supports_dictionary_syntax(&self) -> bool {
true
}
}
2 changes: 1 addition & 1 deletion src/keywords.rs
Original file line number Diff line number Diff line change
Expand Up @@ -931,7 +931,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
Keyword::PREWHERE,
// for ClickHouse SELECT * FROM t SETTINGS ...
Keyword::SETTINGS,
// for ClickHouse SELECT * FROM t FORMAT...
// for ClickHouse SELECT * FROM t FORMAT... or INSERT INTO t FORMAT...
Keyword::FORMAT,
// for Snowflake START WITH .. CONNECT BY
Keyword::START,
Expand Down
93 changes: 71 additions & 22 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9570,12 +9570,7 @@ impl<'a> Parser<'a> {
let format_clause = if dialect_of!(self is ClickHouseDialect | GenericDialect)
&& self.parse_keyword(Keyword::FORMAT)
{
if self.parse_keyword(Keyword::NULL) {
Some(FormatClause::Null)
} else {
let ident = self.parse_identifier()?;
Some(FormatClause::Identifier(ident))
}
Some(self.parse_format_clause(false)?)
} else {
None
};
Expand Down Expand Up @@ -11824,30 +11819,53 @@ impl<'a> Parser<'a> {

let is_mysql = dialect_of!(self is MySqlDialect);

let (columns, partitioned, after_columns, source) =
if self.parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES]) {
(vec![], None, vec![], None)
let (columns, partitioned, after_columns, source) = if self
.parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES])
{
(vec![], None, vec![], None)
} else {
let (columns, partitioned, after_columns) = if !self.peek_subquery_start() {
let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?;

let partitioned = self.parse_insert_partition()?;
// Hive allows you to specify columns after partitions as well if you want.
let after_columns = if dialect_of!(self is HiveDialect) {
self.parse_parenthesized_column_list(Optional, false)?
} else {
vec![]
};
(columns, partitioned, after_columns)
} else {
let (columns, partitioned, after_columns) = if !self.peek_subquery_start() {
let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?;
Default::default()
};

let partitioned = self.parse_insert_partition()?;
// Hive allows you to specify columns after partitions as well if you want.
let after_columns = if dialect_of!(self is HiveDialect) {
self.parse_parenthesized_column_list(Optional, false)?
} else {
vec![]
};
(columns, partitioned, after_columns)
let source =
if self.peek_keyword(Keyword::FORMAT) || self.peek_keyword(Keyword::SETTINGS) {
None
} else {
Default::default()
Some(self.parse_query()?)
};

let source = Some(self.parse_query()?);
(columns, partitioned, after_columns, source)
};

let (format_clause, settings) = if dialect_of!(self is ClickHouseDialect | GenericDialect)
{
// Settings always comes before `FORMAT` for ClickHouse:
// <https://clickhouse.com/docs/en/sql-reference/statements/insert-into>
let settings = self.parse_settings()?;

(columns, partitioned, after_columns, source)
let format = if self.parse_keyword(Keyword::FORMAT) {
Some(self.parse_format_clause(true)?)
} else {
None
};

(format, settings)
} else {
(None, None)
};

let insert_alias = if dialect_of!(self is MySqlDialect | GenericDialect)
&& self.parse_keyword(Keyword::AS)
{
Expand Down Expand Up @@ -11931,10 +11949,41 @@ impl<'a> Parser<'a> {
replace_into,
priority,
insert_alias,
settings,
format_clause,
}))
}
}

// Parses format clause used for [ClickHouse]. Formats are different when using `SELECT` and
// `INSERT` and also when using the CLI for pipes. It may or may not take an additional
// expression after the format so we try to parse the expression but allow failure.
//
// Since we know we never take an additional expression in `SELECT` context we never only try
// to parse if `can_have_expression` is true.
//
// <https://clickhouse.com/docs/en/interfaces/formats>
pub fn parse_format_clause(
&mut self,
can_have_expression: bool,
) -> Result<FormatClause, ParserError> {
if self.parse_keyword(Keyword::NULL) {
Ok(FormatClause::Null)
} else {
let ident = self.parse_identifier()?;
let expr = if can_have_expression {
match self.try_parse(|p| p.parse_comma_separated(|p| p.parse_expr())) {
Ok(expr) => Some(expr),
_ => None,
}
} else {
None
};

Ok(FormatClause::Identifier { ident, expr })
}
}

/// Returns true if the immediate tokens look like the
/// beginning of a subquery. `(SELECT ...`
fn peek_subquery_start(&mut self) -> bool {
Expand Down
26 changes: 25 additions & 1 deletion tests/sqlparser_clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1378,7 +1378,10 @@ fn test_query_with_format_clause() {
} else {
assert_eq!(
query.format_clause,
Some(FormatClause::Identifier(Ident::new(*format)))
Some(FormatClause::Identifier {
ident: Ident::new(*format),
expr: None
})
);
}
}
Expand All @@ -1398,6 +1401,27 @@ fn test_query_with_format_clause() {
}
}

#[test]
fn test_insert_query_with_format_clause() {
let cases = [
r#"INSERT INTO tbl FORMAT JSONEachRow {"id": 1, "value": "foo"}, {"id": 2, "value": "bar"}"#,
r#"INSERT INTO tbl FORMAT JSONEachRow ["first", "second", "third"]"#,
r#"INSERT INTO tbl FORMAT JSONEachRow [{"first": 1}]"#,
r#"INSERT INTO tbl FORMAT jsoneachrow {"id": 1}"#,
r#"INSERT INTO tbl (foo) FORMAT JSONAsObject {"foo": {"bar": {"x": "y"}, "baz": 1}}"#,
r#"INSERT INTO tbl (foo, bar) FORMAT JSON {"foo": 1, "bar": 2}"#,
r#"INSERT INTO tbl FORMAT CSV col1, col2, col3"#,
r#"INSERT INTO tbl FORMAT LineAsString "I love apple", "I love banana", "I love orange""#,
r#"INSERT INTO tbl (foo) SETTINGS input_format_json_read_bools_as_numbers = true FORMAT JSONEachRow {"id": 1, "value": "foo"}"#,
r#"INSERT INTO tbl SETTINGS format_template_resultset = '/some/path/resultset.format', format_template_row = '/some/path/row.format' FORMAT Template"#,
r#"INSERT INTO tbl SETTINGS input_format_json_read_bools_as_numbers = true FORMAT JSONEachRow {"id": 1, "value": "foo"}"#,
];

for sql in &cases {
clickhouse_and_generic().verified_stmt(sql);
}
}

#[test]
fn parse_create_table_on_commit_and_as_query() {
let sql = r#"CREATE LOCAL TEMPORARY TABLE test ON COMMIT PRESERVE ROWS AS SELECT 1"#;
Expand Down
10 changes: 8 additions & 2 deletions tests/sqlparser_postgres.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4430,7 +4430,9 @@ fn test_simple_postgres_insert_with_alias() {
returning: None,
replace_into: false,
priority: None,
insert_alias: None
insert_alias: None,
settings: None,
format_clause: None,
})
)
}
Expand Down Expand Up @@ -4500,7 +4502,9 @@ fn test_simple_postgres_insert_with_alias() {
returning: None,
replace_into: false,
priority: None,
insert_alias: None
insert_alias: None,
settings: None,
format_clause: None,
})
)
}
Expand Down Expand Up @@ -4567,6 +4571,8 @@ fn test_simple_insert_with_quoted_alias() {
replace_into: false,
priority: None,
insert_alias: None,
settings: None,
format_clause: None,
})
)
}
Expand Down

0 comments on commit 08fadd1

Please sign in to comment.