Skip to content

Commit

Permalink
Merge pull request #220 from dmaresma/feature/218
Browse files Browse the repository at this point in the history
feature #218 solve my requirement
  • Loading branch information
xnuinside authored Jan 5, 2024
2 parents 8eacab7 + 429a187 commit bb52593
Show file tree
Hide file tree
Showing 8 changed files with 533 additions and 293 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@

**v0.31.3**
### Improvements
#### Snowflake update:
1. Added support for Snowflake Virtual Column definition in External Column `AS ()` statement - https://github.com/xnuinside/simple-ddl-parser/issues/218

**v0.31.2**
### Improvements
#### Snowflake update:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "simple-ddl-parser"
version = "0.31.2"
version = "0.31.3"
description = "Simple DDL Parser to parse SQL & dialects like HQL, TSQL (MSSQL), Oracle, AWS Redshift, Snowflake, MySQL, PostgreSQL, etc ddl files to json/python dict with full information about columns: types, defaults, primary keys, etc.; sequences, alters, custom types & other entities from ddl."
authors = ["Iuliia Volkova <[email protected]>"]
license = "MIT"
Expand Down
3 changes: 2 additions & 1 deletion simple_ddl_parser/dialects/hql.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
class HQL:
def p_expression_location(self, p):
"""expr : expr LOCATION STRING
| expr LOCATION DQ_STRING"""
| expr LOCATION DQ_STRING
| expr LOCATION table_property_equals"""
p[0] = p[1]
p_list = list(p)
p[0]["location"] = p_list[-1]
Expand Down
57 changes: 56 additions & 1 deletion simple_ddl_parser/dialects/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,17 @@ def p_expression_cluster_by(self, p):
def p_table_property_equals(self, p: List) -> None:
"""table_property_equals : id id id_or_string
| id id_or_string
| LP id id id_or_string RP
| LP id_or_string RP
"""
p_list = remove_par(list(p))
p[0] = str(p_list[-1])

def p_table_property_equals_int(self, p: List) -> None:
"""table_property_equals_int : id id id_or_string
| id id_or_string
| LP id id id_or_string RP
| LP id_or_string RP
"""
p_list = remove_par(list(p))
p[0] = int(p_list[-1])
Expand All @@ -36,7 +47,7 @@ def p_table_property_equals_bool(self, p: List) -> None:
p[0] = False

def p_expression_data_retention_time_in_days(self, p):
"""expr : expr DATA_RETENTION_TIME_IN_DAYS table_property_equals"""
"""expr : expr DATA_RETENTION_TIME_IN_DAYS table_property_equals_int"""
p[0] = p[1]
p_list = remove_par(list(p))
p[0]["data_retention_time_in_days"] = p_list[-1]
Expand Down Expand Up @@ -119,3 +130,47 @@ def p_option_with_masking_policy(self, p):
"""
p_list = remove_par(list(p))
p[0] = {"with_masking_policy": f"{p_list[-5]}.{p_list[-3]}.{p_list[-1]}"}

def p_expression_catalog(self, p):
"""expr : expr CATALOG table_property_equals"""
p[0] = p[1]
p_list = remove_par(list(p))
p[0]["catalog"] = p_list[-1]

def p_expression_file_format(self, p):
"""expr : expr FILE_FORMAT table_property_equals"""
p[0] = p[1]
p_list = remove_par(list(p))
p[0]["file_format"] = p_list[-1]

def p_expression_stage_file_format(self, p):
"""expr : expr STAGE_FILE_FORMAT table_property_equals"""
p[0] = p[1]
p_list = remove_par(list(p))
p[0]["stage_file_format"] = p_list[-1]

def p_expression_table_format(self, p):
"""expr : expr TABLE_FORMAT table_property_equals"""
p[0] = p[1]
p_list = remove_par(list(p))
p[0]["table_format"] = p_list[-1]

def p_expression_auto_refresh(self, p):
"""expr : expr AUTO_REFRESH table_property_equals_bool"""
p[0] = p[1]
p_list = remove_par(list(p))
p[0]["auto_refresh"] = p_list[-1]

def p_as_virtual(self, p: List):
"""as_virtual : AS LP id LP id LP pid RP COMMA pid RP RP
| AS LP id LP pid RP RP
| AS LP multi_id RP"""
_as = ""
# Simple function else Nested function call
if len(p) == 5:
_as = p[3]
else:
# _as = p[3]+p[4]+p[5]+p[6]+",".join(p[7])+p[8]+p[9]+",".join(p[10])+p[11]
for i in p[3:len(p) - 1]:
_as += i if isinstance(i, str) else ",".join(i)
p[0] = {"generated": {"as": _as}}
3 changes: 2 additions & 1 deletion simple_ddl_parser/dialects/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def p_create_table(self, p: List):
| CREATE OR REPLACE id TABLE
"""
# id - for EXTERNAL, TRANSIENT, TEMPORARY, GLOBAL, LOCAL, TEMP, VOLATILE
# id - for EXTERNAL, TRANSIENT, TEMPORARY, GLOBAL, LOCAL, TEMP, VOLATILE, ICEBERG
# get schema & table name
p[0] = {}
p_list = list(p)
Expand Down Expand Up @@ -401,6 +401,7 @@ def p_defcolumn(self, p: List) -> None:
| defcolumn option_order_noorder
| defcolumn option_with_tag
| defcolumn option_with_masking_policy
| defcolumn as_virtual
"""
p[0] = p[1]
p_list = list(p)
Expand Down
588 changes: 302 additions & 286 deletions simple_ddl_parser/parsetab.py

Large diffs are not rendered by default.

9 changes: 7 additions & 2 deletions simple_ddl_parser/tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
"MASKED": "MASKED",
"WITH": "WITH",
"ORDER": "ORDER",
"NOORDER": "NOORDER"
"NOORDER": "NOORDER",
}
first_liners = {
"LIKE": "LIKE",
Expand Down Expand Up @@ -100,6 +100,11 @@
"DATA_RETENTION_TIME_IN_DAYS": "DATA_RETENTION_TIME_IN_DAYS",
"MAX_DATA_EXTENSION_TIME_IN_DAYS": "MAX_DATA_EXTENSION_TIME_IN_DAYS",
"CHANGE_TRACKING": "CHANGE_TRACKING",
"AUTO_REFRESH": "AUTO_REFRESH",
"FILE_FORMAT": "FILE_FORMAT",
"TABLE_FORMAT": "TABLE_FORMAT",
"STAGE_FILE_FORMAT": "STAGE_FILE_FORMAT",
"CATALOG": "CATALOG",
}
sequence_reserved = {
"INCREMENT": "INCREMENT",
Expand All @@ -111,7 +116,7 @@
"NO": "NO",
"BY": "BY",
"NOORDER": "NOORDER",
"ORDER": "ORDER"
"ORDER": "ORDER",
}


Expand Down
159 changes: 158 additions & 1 deletion tests/dialects/test_snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -767,4 +767,161 @@ def test_order_sequence():
"noorder": True,
}
]
assert expected == parse_results
assert expected == parse_results

def test_virtual_column_ext_table():
ddl = """
create or replace external table if not exists TABLE_DATA_SRC.EXT_PAYLOAD_MANIFEST_WEB (
"type" VARCHAR(255) AS (SPLIT_PART(SPLIT_PART(METADATA$FILENAME, '/', 1), '=', 2 )),
"year" VARCHAR(255) AS (SPLIT_PART(SPLIT_PART(METADATA$FILENAME, '/', 2), '=', 2)),
"month" VARCHAR(255) AS (SPLIT_PART(SPLIT_PART(METADATA$FILENAME, '/', 3), '=', 2)),
"day" VARCHAR(255) AS (SPLIT_PART(SPLIT_PART(METADATA$FILENAME, '/', 4), '=', 2)),
"path" VARCHAR(255) AS (METADATA$FILENAME)
)
partition by ("type", "year", "month", "day", "path")
location = @ADL_Azure_Storage_Account_Container_Name/
auto_refresh = false
;
"""
result_ext_table = DDLParser(ddl, normalize_names=True, debug=True).run(
output_mode="snowflake"
)

expected_ext_table = [
{
"alter": {},
"checks": [],
"clone": None,
"columns": [
{
"name": "type",
"type": "VARCHAR",
"size": 255,
"references": None,
"unique": False,
"nullable": True,
"default": None,
"check": None,
"generated" : {"as" : "SPLIT_PART(SPLIT_PART(METADATA$FILENAME,'/',1),'=',2)" }
} ,
{
"name": "year",
"type": "VARCHAR",
"size": 255,
"references": None,
"unique": False,
"nullable": True,
"default": None,
"check": None,
"generated" : {"as" : "SPLIT_PART(SPLIT_PART(METADATA$FILENAME,'/',2),'=',2)" }
},
{
"name": "month",
"type": "VARCHAR",
"size": 255,
"references": None,
"unique": False,
"nullable": True,
"default": None,
"check": None,
"generated" : {"as" : "SPLIT_PART(SPLIT_PART(METADATA$FILENAME,'/',3),'=',2)"}
},
{
"name": "day",
"type": "VARCHAR",
"size": 255,
"references": None,
"unique": False,
"nullable": True,
"default": None,
"check": None,
"generated" : {"as" : "SPLIT_PART(SPLIT_PART(METADATA$FILENAME,'/',4),'=',2)"}
},
{
"name": "path",
"type": "VARCHAR",
"size": 255,
"references": None,
"unique": False,
"nullable": True,
"default": None,
"check": None,
"generated" : {"as" : "METADATA$FILENAME" }
}
],
"index": [],
"partition_by": { "columns" :["type", "year", "month", "day", "path"], "type" : None},
"partitioned_by" : [],
"primary_key": [],
"primary_key_enforced": None,
"auto_refresh" : False,
"schema": "TABLE_DATA_SRC",
"table_name": "EXT_PAYLOAD_MANIFEST_WEB",
"tablespace": None,
"replace" : True,
"if_not_exists": True,
"location" : "@ADL_Azure_Storage_Account_Container_Name/",
}
]

assert result_ext_table == expected_ext_table

def test_virtual_column_table():
ddl = """
create or replace table if not exists TABLE_DATA_SRC.EXT_PAYLOAD_MANIFEST_WEB (
id bigint,
derived bigint as (id * 10)
)
partition by ("type", "year", "month", "day", "path")
location = @ADL_Azure_Storage_Account_Container_Name/
auto_refresh = false
;
"""
result_ext_table = DDLParser(ddl, normalize_names=True, debug=True).run(
output_mode="snowflake"
)

expected_ext_table = [
{
"alter": {},
"checks": [],
"clone": None,
"columns": [
{
"name": "id",
"type": "bigint",
"size": None,
"references": None,
"unique": False,
"nullable": True,
"default": None,
"check": None,
},
{
"name": "derived",
"type": "bigint",
"size": None,
"references": None,
"unique": False,
"nullable": True,
"default": None,
"check": None,
"generated" : {"as" : "id * 10" }
}
],
"index": [],
"partition_by": { "columns" :["type", "year", "month", "day", "path"], "type" : None},
"partitioned_by" : [],
"primary_key": [],
"primary_key_enforced": None,
"auto_refresh" : False,
"schema": "TABLE_DATA_SRC",
"table_name": "EXT_PAYLOAD_MANIFEST_WEB",
"tablespace": None,
"replace" : True,
"if_not_exists": True,
"location" : "@ADL_Azure_Storage_Account_Container_Name/",
}
]

assert result_ext_table == expected_ext_table

0 comments on commit bb52593

Please sign in to comment.